1 /*
2  * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3  *            implemented on top of the SAX interfaces
4  *
5  * References:
6  *   The XML specification:
7  *     http://www.w3.org/TR/REC-xml
8  *   Original 1.0 version:
9  *     http://www.w3.org/TR/1998/REC-xml-19980210
10  *   XML second edition working draft
11  *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12  *
13  * Okay this is a big file, the parser core is around 7000 lines, then it
14  * is followed by the progressive parser top routines, then the various
15  * high level APIs to call the parser and a few miscellaneous functions.
16  * A number of helper functions and deprecated ones have been moved to
17  * parserInternals.c to reduce this file size.
18  * As much as possible the functions are associated with their relative
19  * production in the XML specification. A few productions defining the
20  * different ranges of character are actually implanted either in
21  * parserInternals.h or parserInternals.c
22  * The DOM tree build is realized from the default SAX callbacks in
23  * the module SAX.c.
24  * The routines doing the validation checks are in valid.c and called either
25  * from the SAX callbacks or as standalone functions using a preparsed
26  * document.
27  *
28  * See Copyright for the status of this software.
29  *
30  * daniel@veillard.com
31  */
32 
33 #define IN_LIBXML
34 #include "libxml.h"
35 
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
38 #else
39 #define XML_DIR_SEP '/'
40 #endif
41 
42 #include <stdlib.h>
43 #include <limits.h>
44 #include <string.h>
45 #include <stdarg.h>
46 #include <libxml/xmlmemory.h>
47 #include <libxml/threads.h>
48 #include <libxml/globals.h>
49 #include <libxml/tree.h>
50 #include <libxml/parser.h>
51 #include <libxml/parserInternals.h>
52 #include <libxml/valid.h>
53 #include <libxml/entities.h>
54 #include <libxml/xmlerror.h>
55 #include <libxml/encoding.h>
56 #include <libxml/xmlIO.h>
57 #include <libxml/uri.h>
58 #ifdef LIBXML_CATALOG_ENABLED
59 #include <libxml/catalog.h>
60 #endif
61 #ifdef LIBXML_SCHEMAS_ENABLED
62 #include <libxml/xmlschemastypes.h>
63 #include <libxml/relaxng.h>
64 #endif
65 #ifdef HAVE_CTYPE_H
66 #include <ctype.h>
67 #endif
68 #ifdef HAVE_STDLIB_H
69 #include <stdlib.h>
70 #endif
71 #ifdef HAVE_SYS_STAT_H
72 #include <sys/stat.h>
73 #endif
74 #ifdef HAVE_FCNTL_H
75 #include <fcntl.h>
76 #endif
77 #ifdef HAVE_UNISTD_H
78 #include <unistd.h>
79 #endif
80 #ifdef HAVE_ZLIB_H
81 #include <zlib.h>
82 #endif
83 #ifdef HAVE_LZMA_H
84 #include <lzma.h>
85 #endif
86 
87 #include "buf.h"
88 #include "enc.h"
89 
90 static void
91 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92 
93 static xmlParserCtxtPtr
94 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 	                  const xmlChar *base, xmlParserCtxtPtr pctx);
96 
97 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
98 
99 /************************************************************************
100  *									*
101  *	Arbitrary limits set in the parser. See XML_PARSE_HUGE		*
102  *									*
103  ************************************************************************/
104 
105 #define XML_PARSER_BIG_ENTITY 1000
106 #define XML_PARSER_LOT_ENTITY 5000
107 
108 /*
109  * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
110  *    replacement over the size in byte of the input indicates that you have
111  *    and eponential behaviour. A value of 10 correspond to at least 3 entity
112  *    replacement per byte of input.
113  */
114 #define XML_PARSER_NON_LINEAR 10
115 
116 /*
117  * xmlParserEntityCheck
118  *
119  * Function to check non-linear entity expansion behaviour
120  * This is here to detect and stop exponential linear entity expansion
121  * This is not a limitation of the parser but a safety
122  * boundary feature. It can be disabled with the XML_PARSE_HUGE
123  * parser option.
124  */
125 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,size_t size,xmlEntityPtr ent,size_t replacement)126 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
127                      xmlEntityPtr ent, size_t replacement)
128 {
129     size_t consumed = 0;
130 
131     if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
132         return (0);
133     if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
134         return (1);
135 
136     /*
137      * This may look absurd but is needed to detect
138      * entities problems
139      */
140     if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
141 	(ent->content != NULL) && (ent->checked == 0)) {
142 	unsigned long oldnbent = ctxt->nbentities;
143 	xmlChar *rep;
144 
145 	ent->checked = 1;
146 
147 	rep = xmlStringDecodeEntities(ctxt, ent->content,
148 				  XML_SUBSTITUTE_REF, 0, 0, 0);
149 
150 	ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
151 	if (rep != NULL) {
152 	    if (xmlStrchr(rep, '<'))
153 		ent->checked |= 1;
154 	    xmlFree(rep);
155 	    rep = NULL;
156 	}
157     }
158     if (replacement != 0) {
159 	if (replacement < XML_MAX_TEXT_LENGTH)
160 	    return(0);
161 
162         /*
163 	 * If the volume of entity copy reaches 10 times the
164 	 * amount of parsed data and over the large text threshold
165 	 * then that's very likely to be an abuse.
166 	 */
167         if (ctxt->input != NULL) {
168 	    consumed = ctxt->input->consumed +
169 	               (ctxt->input->cur - ctxt->input->base);
170 	}
171         consumed += ctxt->sizeentities;
172 
173         if (replacement < XML_PARSER_NON_LINEAR * consumed)
174 	    return(0);
175     } else if (size != 0) {
176         /*
177          * Do the check based on the replacement size of the entity
178          */
179         if (size < XML_PARSER_BIG_ENTITY)
180 	    return(0);
181 
182         /*
183          * A limit on the amount of text data reasonably used
184          */
185         if (ctxt->input != NULL) {
186             consumed = ctxt->input->consumed +
187                 (ctxt->input->cur - ctxt->input->base);
188         }
189         consumed += ctxt->sizeentities;
190 
191         if ((size < XML_PARSER_NON_LINEAR * consumed) &&
192 	    (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
193             return (0);
194     } else if (ent != NULL) {
195         /*
196          * use the number of parsed entities in the replacement
197          */
198         size = ent->checked / 2;
199 
200         /*
201          * The amount of data parsed counting entities size only once
202          */
203         if (ctxt->input != NULL) {
204             consumed = ctxt->input->consumed +
205                 (ctxt->input->cur - ctxt->input->base);
206         }
207         consumed += ctxt->sizeentities;
208 
209         /*
210          * Check the density of entities for the amount of data
211 	 * knowing an entity reference will take at least 3 bytes
212          */
213         if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
214             return (0);
215     } else {
216         /*
217          * strange we got no data for checking
218          */
219 	if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
220 	     (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
221 	    (ctxt->nbentities <= 10000))
222 	    return (0);
223     }
224     xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
225     return (1);
226 }
227 
228 /**
229  * xmlParserMaxDepth:
230  *
231  * arbitrary depth limit for the XML documents that we allow to
232  * process. This is not a limitation of the parser but a safety
233  * boundary feature. It can be disabled with the XML_PARSE_HUGE
234  * parser option.
235  */
236 unsigned int xmlParserMaxDepth = 256;
237 
238 
239 
240 #define SAX2 1
241 #define XML_PARSER_BIG_BUFFER_SIZE 300
242 #define XML_PARSER_BUFFER_SIZE 100
243 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
244 
245 /**
246  * XML_PARSER_CHUNK_SIZE
247  *
248  * When calling GROW that's the minimal amount of data
249  * the parser expected to have received. It is not a hard
250  * limit but an optimization when reading strings like Names
251  * It is not strictly needed as long as inputs available characters
252  * are followed by 0, which should be provided by the I/O level
253  */
254 #define XML_PARSER_CHUNK_SIZE 100
255 
256 /*
257  * List of XML prefixed PI allowed by W3C specs
258  */
259 
260 static const char *xmlW3CPIs[] = {
261     "xml-stylesheet",
262     "xml-model",
263     NULL
264 };
265 
266 
267 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
268 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
269                                               const xmlChar **str);
270 
271 static xmlParserErrors
272 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
273 	              xmlSAXHandlerPtr sax,
274 		      void *user_data, int depth, const xmlChar *URL,
275 		      const xmlChar *ID, xmlNodePtr *list);
276 
277 static int
278 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
279                           const char *encoding);
280 #ifdef LIBXML_LEGACY_ENABLED
281 static void
282 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
283                       xmlNodePtr lastNode);
284 #endif /* LIBXML_LEGACY_ENABLED */
285 
286 static xmlParserErrors
287 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
288 		      const xmlChar *string, void *user_data, xmlNodePtr *lst);
289 
290 static int
291 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
292 
293 /************************************************************************
294  *									*
295  *		Some factorized error routines				*
296  *									*
297  ************************************************************************/
298 
299 /**
300  * xmlErrAttributeDup:
301  * @ctxt:  an XML parser context
302  * @prefix:  the attribute prefix
303  * @localname:  the attribute localname
304  *
305  * Handle a redefinition of attribute error
306  */
307 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)308 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
309                    const xmlChar * localname)
310 {
311     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
312         (ctxt->instate == XML_PARSER_EOF))
313 	return;
314     if (ctxt != NULL)
315 	ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
316 
317     if (prefix == NULL)
318         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
319                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
320                         (const char *) localname, NULL, NULL, 0, 0,
321                         "Attribute %s redefined\n", localname);
322     else
323         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
324                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
325                         (const char *) prefix, (const char *) localname,
326                         NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
327                         localname);
328     if (ctxt != NULL) {
329 	ctxt->wellFormed = 0;
330 	if (ctxt->recovery == 0)
331 	    ctxt->disableSAX = 1;
332     }
333 }
334 
335 /**
336  * xmlFatalErr:
337  * @ctxt:  an XML parser context
338  * @error:  the error number
339  * @extra:  extra information string
340  *
341  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
342  */
343 static void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * info)344 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
345 {
346     const char *errmsg;
347     char errstr[129] = "";
348 
349     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
350         (ctxt->instate == XML_PARSER_EOF))
351 	return;
352     switch (error) {
353         case XML_ERR_INVALID_HEX_CHARREF:
354             errmsg = "CharRef: invalid hexadecimal value";
355             break;
356         case XML_ERR_INVALID_DEC_CHARREF:
357             errmsg = "CharRef: invalid decimal value";
358             break;
359         case XML_ERR_INVALID_CHARREF:
360             errmsg = "CharRef: invalid value";
361             break;
362         case XML_ERR_INTERNAL_ERROR:
363             errmsg = "internal error";
364             break;
365         case XML_ERR_PEREF_AT_EOF:
366             errmsg = "PEReference at end of document";
367             break;
368         case XML_ERR_PEREF_IN_PROLOG:
369             errmsg = "PEReference in prolog";
370             break;
371         case XML_ERR_PEREF_IN_EPILOG:
372             errmsg = "PEReference in epilog";
373             break;
374         case XML_ERR_PEREF_NO_NAME:
375             errmsg = "PEReference: no name";
376             break;
377         case XML_ERR_PEREF_SEMICOL_MISSING:
378             errmsg = "PEReference: expecting ';'";
379             break;
380         case XML_ERR_ENTITY_LOOP:
381             errmsg = "Detected an entity reference loop";
382             break;
383         case XML_ERR_ENTITY_NOT_STARTED:
384             errmsg = "EntityValue: \" or ' expected";
385             break;
386         case XML_ERR_ENTITY_PE_INTERNAL:
387             errmsg = "PEReferences forbidden in internal subset";
388             break;
389         case XML_ERR_ENTITY_NOT_FINISHED:
390             errmsg = "EntityValue: \" or ' expected";
391             break;
392         case XML_ERR_ATTRIBUTE_NOT_STARTED:
393             errmsg = "AttValue: \" or ' expected";
394             break;
395         case XML_ERR_LT_IN_ATTRIBUTE:
396             errmsg = "Unescaped '<' not allowed in attributes values";
397             break;
398         case XML_ERR_LITERAL_NOT_STARTED:
399             errmsg = "SystemLiteral \" or ' expected";
400             break;
401         case XML_ERR_LITERAL_NOT_FINISHED:
402             errmsg = "Unfinished System or Public ID \" or ' expected";
403             break;
404         case XML_ERR_MISPLACED_CDATA_END:
405             errmsg = "Sequence ']]>' not allowed in content";
406             break;
407         case XML_ERR_URI_REQUIRED:
408             errmsg = "SYSTEM or PUBLIC, the URI is missing";
409             break;
410         case XML_ERR_PUBID_REQUIRED:
411             errmsg = "PUBLIC, the Public Identifier is missing";
412             break;
413         case XML_ERR_HYPHEN_IN_COMMENT:
414             errmsg = "Comment must not contain '--' (double-hyphen)";
415             break;
416         case XML_ERR_PI_NOT_STARTED:
417             errmsg = "xmlParsePI : no target name";
418             break;
419         case XML_ERR_RESERVED_XML_NAME:
420             errmsg = "Invalid PI name";
421             break;
422         case XML_ERR_NOTATION_NOT_STARTED:
423             errmsg = "NOTATION: Name expected here";
424             break;
425         case XML_ERR_NOTATION_NOT_FINISHED:
426             errmsg = "'>' required to close NOTATION declaration";
427             break;
428         case XML_ERR_VALUE_REQUIRED:
429             errmsg = "Entity value required";
430             break;
431         case XML_ERR_URI_FRAGMENT:
432             errmsg = "Fragment not allowed";
433             break;
434         case XML_ERR_ATTLIST_NOT_STARTED:
435             errmsg = "'(' required to start ATTLIST enumeration";
436             break;
437         case XML_ERR_NMTOKEN_REQUIRED:
438             errmsg = "NmToken expected in ATTLIST enumeration";
439             break;
440         case XML_ERR_ATTLIST_NOT_FINISHED:
441             errmsg = "')' required to finish ATTLIST enumeration";
442             break;
443         case XML_ERR_MIXED_NOT_STARTED:
444             errmsg = "MixedContentDecl : '|' or ')*' expected";
445             break;
446         case XML_ERR_PCDATA_REQUIRED:
447             errmsg = "MixedContentDecl : '#PCDATA' expected";
448             break;
449         case XML_ERR_ELEMCONTENT_NOT_STARTED:
450             errmsg = "ContentDecl : Name or '(' expected";
451             break;
452         case XML_ERR_ELEMCONTENT_NOT_FINISHED:
453             errmsg = "ContentDecl : ',' '|' or ')' expected";
454             break;
455         case XML_ERR_PEREF_IN_INT_SUBSET:
456             errmsg =
457                 "PEReference: forbidden within markup decl in internal subset";
458             break;
459         case XML_ERR_GT_REQUIRED:
460             errmsg = "expected '>'";
461             break;
462         case XML_ERR_CONDSEC_INVALID:
463             errmsg = "XML conditional section '[' expected";
464             break;
465         case XML_ERR_EXT_SUBSET_NOT_FINISHED:
466             errmsg = "Content error in the external subset";
467             break;
468         case XML_ERR_CONDSEC_INVALID_KEYWORD:
469             errmsg =
470                 "conditional section INCLUDE or IGNORE keyword expected";
471             break;
472         case XML_ERR_CONDSEC_NOT_FINISHED:
473             errmsg = "XML conditional section not closed";
474             break;
475         case XML_ERR_XMLDECL_NOT_STARTED:
476             errmsg = "Text declaration '<?xml' required";
477             break;
478         case XML_ERR_XMLDECL_NOT_FINISHED:
479             errmsg = "parsing XML declaration: '?>' expected";
480             break;
481         case XML_ERR_EXT_ENTITY_STANDALONE:
482             errmsg = "external parsed entities cannot be standalone";
483             break;
484         case XML_ERR_ENTITYREF_SEMICOL_MISSING:
485             errmsg = "EntityRef: expecting ';'";
486             break;
487         case XML_ERR_DOCTYPE_NOT_FINISHED:
488             errmsg = "DOCTYPE improperly terminated";
489             break;
490         case XML_ERR_LTSLASH_REQUIRED:
491             errmsg = "EndTag: '</' not found";
492             break;
493         case XML_ERR_EQUAL_REQUIRED:
494             errmsg = "expected '='";
495             break;
496         case XML_ERR_STRING_NOT_CLOSED:
497             errmsg = "String not closed expecting \" or '";
498             break;
499         case XML_ERR_STRING_NOT_STARTED:
500             errmsg = "String not started expecting ' or \"";
501             break;
502         case XML_ERR_ENCODING_NAME:
503             errmsg = "Invalid XML encoding name";
504             break;
505         case XML_ERR_STANDALONE_VALUE:
506             errmsg = "standalone accepts only 'yes' or 'no'";
507             break;
508         case XML_ERR_DOCUMENT_EMPTY:
509             errmsg = "Document is empty";
510             break;
511         case XML_ERR_DOCUMENT_END:
512             errmsg = "Extra content at the end of the document";
513             break;
514         case XML_ERR_NOT_WELL_BALANCED:
515             errmsg = "chunk is not well balanced";
516             break;
517         case XML_ERR_EXTRA_CONTENT:
518             errmsg = "extra content at the end of well balanced chunk";
519             break;
520         case XML_ERR_VERSION_MISSING:
521             errmsg = "Malformed declaration expecting version";
522             break;
523         case XML_ERR_NAME_TOO_LONG:
524             errmsg = "Name too long use XML_PARSE_HUGE option";
525             break;
526 #if 0
527         case:
528             errmsg = "";
529             break;
530 #endif
531         default:
532             errmsg = "Unregistered error message";
533     }
534     if (info == NULL)
535         snprintf(errstr, 128, "%s\n", errmsg);
536     else
537         snprintf(errstr, 128, "%s: %%s\n", errmsg);
538     if (ctxt != NULL)
539 	ctxt->errNo = error;
540     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
541                     XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
542                     info);
543     if (ctxt != NULL) {
544 	ctxt->wellFormed = 0;
545 	if (ctxt->recovery == 0)
546 	    ctxt->disableSAX = 1;
547     }
548 }
549 
550 /**
551  * xmlFatalErrMsg:
552  * @ctxt:  an XML parser context
553  * @error:  the error number
554  * @msg:  the error message
555  *
556  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
557  */
558 static void
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)559 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
560                const char *msg)
561 {
562     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
563         (ctxt->instate == XML_PARSER_EOF))
564 	return;
565     if (ctxt != NULL)
566 	ctxt->errNo = error;
567     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
568                     XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
569     if (ctxt != NULL) {
570 	ctxt->wellFormed = 0;
571 	if (ctxt->recovery == 0)
572 	    ctxt->disableSAX = 1;
573     }
574 }
575 
576 /**
577  * xmlWarningMsg:
578  * @ctxt:  an XML parser context
579  * @error:  the error number
580  * @msg:  the error message
581  * @str1:  extra data
582  * @str2:  extra data
583  *
584  * Handle a warning.
585  */
586 static void
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)587 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
588               const char *msg, const xmlChar *str1, const xmlChar *str2)
589 {
590     xmlStructuredErrorFunc schannel = NULL;
591 
592     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
593         (ctxt->instate == XML_PARSER_EOF))
594 	return;
595     if ((ctxt != NULL) && (ctxt->sax != NULL) &&
596         (ctxt->sax->initialized == XML_SAX2_MAGIC))
597         schannel = ctxt->sax->serror;
598     if (ctxt != NULL) {
599         __xmlRaiseError(schannel,
600                     (ctxt->sax) ? ctxt->sax->warning : NULL,
601                     ctxt->userData,
602                     ctxt, NULL, XML_FROM_PARSER, error,
603                     XML_ERR_WARNING, NULL, 0,
604 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
605 		    msg, (const char *) str1, (const char *) str2);
606     } else {
607         __xmlRaiseError(schannel, NULL, NULL,
608                     ctxt, NULL, XML_FROM_PARSER, error,
609                     XML_ERR_WARNING, NULL, 0,
610 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
611 		    msg, (const char *) str1, (const char *) str2);
612     }
613 }
614 
615 /**
616  * xmlValidityError:
617  * @ctxt:  an XML parser context
618  * @error:  the error number
619  * @msg:  the error message
620  * @str1:  extra data
621  *
622  * Handle a validity error.
623  */
624 static void
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)625 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
626               const char *msg, const xmlChar *str1, const xmlChar *str2)
627 {
628     xmlStructuredErrorFunc schannel = NULL;
629 
630     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
631         (ctxt->instate == XML_PARSER_EOF))
632 	return;
633     if (ctxt != NULL) {
634 	ctxt->errNo = error;
635 	if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
636 	    schannel = ctxt->sax->serror;
637     }
638     if (ctxt != NULL) {
639         __xmlRaiseError(schannel,
640                     ctxt->vctxt.error, ctxt->vctxt.userData,
641                     ctxt, NULL, XML_FROM_DTD, error,
642                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
643 		    (const char *) str2, NULL, 0, 0,
644 		    msg, (const char *) str1, (const char *) str2);
645 	ctxt->valid = 0;
646     } else {
647         __xmlRaiseError(schannel, NULL, NULL,
648                     ctxt, NULL, XML_FROM_DTD, error,
649                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
650 		    (const char *) str2, NULL, 0, 0,
651 		    msg, (const char *) str1, (const char *) str2);
652     }
653 }
654 
655 /**
656  * xmlFatalErrMsgInt:
657  * @ctxt:  an XML parser context
658  * @error:  the error number
659  * @msg:  the error message
660  * @val:  an integer value
661  *
662  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
663  */
664 static void
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)665 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
666                   const char *msg, int val)
667 {
668     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
669         (ctxt->instate == XML_PARSER_EOF))
670 	return;
671     if (ctxt != NULL)
672 	ctxt->errNo = error;
673     __xmlRaiseError(NULL, NULL, NULL,
674                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
675                     NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
676     if (ctxt != NULL) {
677 	ctxt->wellFormed = 0;
678 	if (ctxt->recovery == 0)
679 	    ctxt->disableSAX = 1;
680     }
681 }
682 
683 /**
684  * xmlFatalErrMsgStrIntStr:
685  * @ctxt:  an XML parser context
686  * @error:  the error number
687  * @msg:  the error message
688  * @str1:  an string info
689  * @val:  an integer value
690  * @str2:  an string info
691  *
692  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
693  */
694 static void
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)695 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
696                   const char *msg, const xmlChar *str1, int val,
697 		  const xmlChar *str2)
698 {
699     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
700         (ctxt->instate == XML_PARSER_EOF))
701 	return;
702     if (ctxt != NULL)
703 	ctxt->errNo = error;
704     __xmlRaiseError(NULL, NULL, NULL,
705                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
706                     NULL, 0, (const char *) str1, (const char *) str2,
707 		    NULL, val, 0, msg, str1, val, str2);
708     if (ctxt != NULL) {
709 	ctxt->wellFormed = 0;
710 	if (ctxt->recovery == 0)
711 	    ctxt->disableSAX = 1;
712     }
713 }
714 
715 /**
716  * xmlFatalErrMsgStr:
717  * @ctxt:  an XML parser context
718  * @error:  the error number
719  * @msg:  the error message
720  * @val:  a string value
721  *
722  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
723  */
724 static void
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)725 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
726                   const char *msg, const xmlChar * val)
727 {
728     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
729         (ctxt->instate == XML_PARSER_EOF))
730 	return;
731     if (ctxt != NULL)
732 	ctxt->errNo = error;
733     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
734                     XML_FROM_PARSER, error, XML_ERR_FATAL,
735                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
736                     val);
737     if (ctxt != NULL) {
738 	ctxt->wellFormed = 0;
739 	if (ctxt->recovery == 0)
740 	    ctxt->disableSAX = 1;
741     }
742 }
743 
744 /**
745  * xmlErrMsgStr:
746  * @ctxt:  an XML parser context
747  * @error:  the error number
748  * @msg:  the error message
749  * @val:  a string value
750  *
751  * Handle a non fatal parser error
752  */
753 static void
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)754 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
755                   const char *msg, const xmlChar * val)
756 {
757     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
758         (ctxt->instate == XML_PARSER_EOF))
759 	return;
760     if (ctxt != NULL)
761 	ctxt->errNo = error;
762     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
763                     XML_FROM_PARSER, error, XML_ERR_ERROR,
764                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
765                     val);
766 }
767 
768 /**
769  * xmlNsErr:
770  * @ctxt:  an XML parser context
771  * @error:  the error number
772  * @msg:  the message
773  * @info1:  extra information string
774  * @info2:  extra information string
775  *
776  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
777  */
778 static void
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)779 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
780          const char *msg,
781          const xmlChar * info1, const xmlChar * info2,
782          const xmlChar * info3)
783 {
784     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
785         (ctxt->instate == XML_PARSER_EOF))
786 	return;
787     if (ctxt != NULL)
788 	ctxt->errNo = error;
789     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
790                     XML_ERR_ERROR, NULL, 0, (const char *) info1,
791                     (const char *) info2, (const char *) info3, 0, 0, msg,
792                     info1, info2, info3);
793     if (ctxt != NULL)
794 	ctxt->nsWellFormed = 0;
795 }
796 
797 /**
798  * xmlNsWarn
799  * @ctxt:  an XML parser context
800  * @error:  the error number
801  * @msg:  the message
802  * @info1:  extra information string
803  * @info2:  extra information string
804  *
805  * Handle a namespace warning error
806  */
807 static void
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)808 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
809          const char *msg,
810          const xmlChar * info1, const xmlChar * info2,
811          const xmlChar * info3)
812 {
813     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
814         (ctxt->instate == XML_PARSER_EOF))
815 	return;
816     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
817                     XML_ERR_WARNING, NULL, 0, (const char *) info1,
818                     (const char *) info2, (const char *) info3, 0, 0, msg,
819                     info1, info2, info3);
820 }
821 
822 /************************************************************************
823  *									*
824  *		Library wide options					*
825  *									*
826  ************************************************************************/
827 
828 /**
829   * xmlHasFeature:
830   * @feature: the feature to be examined
831   *
832   * Examines if the library has been compiled with a given feature.
833   *
834   * Returns a non-zero value if the feature exist, otherwise zero.
835   * Returns zero (0) if the feature does not exist or an unknown
836   * unknown feature is requested, non-zero otherwise.
837   */
838 int
xmlHasFeature(xmlFeature feature)839 xmlHasFeature(xmlFeature feature)
840 {
841     switch (feature) {
842 	case XML_WITH_THREAD:
843 #ifdef LIBXML_THREAD_ENABLED
844 	    return(1);
845 #else
846 	    return(0);
847 #endif
848         case XML_WITH_TREE:
849 #ifdef LIBXML_TREE_ENABLED
850             return(1);
851 #else
852             return(0);
853 #endif
854         case XML_WITH_OUTPUT:
855 #ifdef LIBXML_OUTPUT_ENABLED
856             return(1);
857 #else
858             return(0);
859 #endif
860         case XML_WITH_PUSH:
861 #ifdef LIBXML_PUSH_ENABLED
862             return(1);
863 #else
864             return(0);
865 #endif
866         case XML_WITH_READER:
867 #ifdef LIBXML_READER_ENABLED
868             return(1);
869 #else
870             return(0);
871 #endif
872         case XML_WITH_PATTERN:
873 #ifdef LIBXML_PATTERN_ENABLED
874             return(1);
875 #else
876             return(0);
877 #endif
878         case XML_WITH_WRITER:
879 #ifdef LIBXML_WRITER_ENABLED
880             return(1);
881 #else
882             return(0);
883 #endif
884         case XML_WITH_SAX1:
885 #ifdef LIBXML_SAX1_ENABLED
886             return(1);
887 #else
888             return(0);
889 #endif
890         case XML_WITH_FTP:
891 #ifdef LIBXML_FTP_ENABLED
892             return(1);
893 #else
894             return(0);
895 #endif
896         case XML_WITH_HTTP:
897 #ifdef LIBXML_HTTP_ENABLED
898             return(1);
899 #else
900             return(0);
901 #endif
902         case XML_WITH_VALID:
903 #ifdef LIBXML_VALID_ENABLED
904             return(1);
905 #else
906             return(0);
907 #endif
908         case XML_WITH_HTML:
909 #ifdef LIBXML_HTML_ENABLED
910             return(1);
911 #else
912             return(0);
913 #endif
914         case XML_WITH_LEGACY:
915 #ifdef LIBXML_LEGACY_ENABLED
916             return(1);
917 #else
918             return(0);
919 #endif
920         case XML_WITH_C14N:
921 #ifdef LIBXML_C14N_ENABLED
922             return(1);
923 #else
924             return(0);
925 #endif
926         case XML_WITH_CATALOG:
927 #ifdef LIBXML_CATALOG_ENABLED
928             return(1);
929 #else
930             return(0);
931 #endif
932         case XML_WITH_XPATH:
933 #ifdef LIBXML_XPATH_ENABLED
934             return(1);
935 #else
936             return(0);
937 #endif
938         case XML_WITH_XPTR:
939 #ifdef LIBXML_XPTR_ENABLED
940             return(1);
941 #else
942             return(0);
943 #endif
944         case XML_WITH_XINCLUDE:
945 #ifdef LIBXML_XINCLUDE_ENABLED
946             return(1);
947 #else
948             return(0);
949 #endif
950         case XML_WITH_ICONV:
951 #ifdef LIBXML_ICONV_ENABLED
952             return(1);
953 #else
954             return(0);
955 #endif
956         case XML_WITH_ISO8859X:
957 #ifdef LIBXML_ISO8859X_ENABLED
958             return(1);
959 #else
960             return(0);
961 #endif
962         case XML_WITH_UNICODE:
963 #ifdef LIBXML_UNICODE_ENABLED
964             return(1);
965 #else
966             return(0);
967 #endif
968         case XML_WITH_REGEXP:
969 #ifdef LIBXML_REGEXP_ENABLED
970             return(1);
971 #else
972             return(0);
973 #endif
974         case XML_WITH_AUTOMATA:
975 #ifdef LIBXML_AUTOMATA_ENABLED
976             return(1);
977 #else
978             return(0);
979 #endif
980         case XML_WITH_EXPR:
981 #ifdef LIBXML_EXPR_ENABLED
982             return(1);
983 #else
984             return(0);
985 #endif
986         case XML_WITH_SCHEMAS:
987 #ifdef LIBXML_SCHEMAS_ENABLED
988             return(1);
989 #else
990             return(0);
991 #endif
992         case XML_WITH_SCHEMATRON:
993 #ifdef LIBXML_SCHEMATRON_ENABLED
994             return(1);
995 #else
996             return(0);
997 #endif
998         case XML_WITH_MODULES:
999 #ifdef LIBXML_MODULES_ENABLED
1000             return(1);
1001 #else
1002             return(0);
1003 #endif
1004         case XML_WITH_DEBUG:
1005 #ifdef LIBXML_DEBUG_ENABLED
1006             return(1);
1007 #else
1008             return(0);
1009 #endif
1010         case XML_WITH_DEBUG_MEM:
1011 #ifdef DEBUG_MEMORY_LOCATION
1012             return(1);
1013 #else
1014             return(0);
1015 #endif
1016         case XML_WITH_DEBUG_RUN:
1017 #ifdef LIBXML_DEBUG_RUNTIME
1018             return(1);
1019 #else
1020             return(0);
1021 #endif
1022         case XML_WITH_ZLIB:
1023 #ifdef LIBXML_ZLIB_ENABLED
1024             return(1);
1025 #else
1026             return(0);
1027 #endif
1028         case XML_WITH_LZMA:
1029 #ifdef LIBXML_LZMA_ENABLED
1030             return(1);
1031 #else
1032             return(0);
1033 #endif
1034         case XML_WITH_ICU:
1035 #ifdef LIBXML_ICU_ENABLED
1036             return(1);
1037 #else
1038             return(0);
1039 #endif
1040         default:
1041 	    break;
1042      }
1043      return(0);
1044 }
1045 
1046 /************************************************************************
1047  *									*
1048  *		SAX2 defaulted attributes handling			*
1049  *									*
1050  ************************************************************************/
1051 
1052 /**
1053  * xmlDetectSAX2:
1054  * @ctxt:  an XML parser context
1055  *
1056  * Do the SAX2 detection and specific intialization
1057  */
1058 static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt)1059 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1060     if (ctxt == NULL) return;
1061 #ifdef LIBXML_SAX1_ENABLED
1062     if ((ctxt->sax) &&  (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1063         ((ctxt->sax->startElementNs != NULL) ||
1064          (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1065 #else
1066     ctxt->sax2 = 1;
1067 #endif /* LIBXML_SAX1_ENABLED */
1068 
1069     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1070     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1071     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1072     if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1073 		(ctxt->str_xml_ns == NULL)) {
1074         xmlErrMemory(ctxt, NULL);
1075     }
1076 }
1077 
1078 typedef struct _xmlDefAttrs xmlDefAttrs;
1079 typedef xmlDefAttrs *xmlDefAttrsPtr;
1080 struct _xmlDefAttrs {
1081     int nbAttrs;	/* number of defaulted attributes on that element */
1082     int maxAttrs;       /* the size of the array */
1083     const xmlChar *values[5]; /* array of localname/prefix/values/external */
1084 };
1085 
1086 /**
1087  * xmlAttrNormalizeSpace:
1088  * @src: the source string
1089  * @dst: the target string
1090  *
1091  * Normalize the space in non CDATA attribute values:
1092  * If the attribute type is not CDATA, then the XML processor MUST further
1093  * process the normalized attribute value by discarding any leading and
1094  * trailing space (#x20) characters, and by replacing sequences of space
1095  * (#x20) characters by a single space (#x20) character.
1096  * Note that the size of dst need to be at least src, and if one doesn't need
1097  * to preserve dst (and it doesn't come from a dictionary or read-only) then
1098  * passing src as dst is just fine.
1099  *
1100  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1101  *         is needed.
1102  */
1103 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1104 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1105 {
1106     if ((src == NULL) || (dst == NULL))
1107         return(NULL);
1108 
1109     while (*src == 0x20) src++;
1110     while (*src != 0) {
1111 	if (*src == 0x20) {
1112 	    while (*src == 0x20) src++;
1113 	    if (*src != 0)
1114 		*dst++ = 0x20;
1115 	} else {
1116 	    *dst++ = *src++;
1117 	}
1118     }
1119     *dst = 0;
1120     if (dst == src)
1121        return(NULL);
1122     return(dst);
1123 }
1124 
1125 /**
1126  * xmlAttrNormalizeSpace2:
1127  * @src: the source string
1128  *
1129  * Normalize the space in non CDATA attribute values, a slightly more complex
1130  * front end to avoid allocation problems when running on attribute values
1131  * coming from the input.
1132  *
1133  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1134  *         is needed.
1135  */
1136 static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt,xmlChar * src,int * len)1137 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1138 {
1139     int i;
1140     int remove_head = 0;
1141     int need_realloc = 0;
1142     const xmlChar *cur;
1143 
1144     if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1145         return(NULL);
1146     i = *len;
1147     if (i <= 0)
1148         return(NULL);
1149 
1150     cur = src;
1151     while (*cur == 0x20) {
1152         cur++;
1153 	remove_head++;
1154     }
1155     while (*cur != 0) {
1156 	if (*cur == 0x20) {
1157 	    cur++;
1158 	    if ((*cur == 0x20) || (*cur == 0)) {
1159 	        need_realloc = 1;
1160 		break;
1161 	    }
1162 	} else
1163 	    cur++;
1164     }
1165     if (need_realloc) {
1166         xmlChar *ret;
1167 
1168 	ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1169 	if (ret == NULL) {
1170 	    xmlErrMemory(ctxt, NULL);
1171 	    return(NULL);
1172 	}
1173 	xmlAttrNormalizeSpace(ret, ret);
1174 	*len = (int) strlen((const char *)ret);
1175         return(ret);
1176     } else if (remove_head) {
1177         *len -= remove_head;
1178         memmove(src, src + remove_head, 1 + *len);
1179 	return(src);
1180     }
1181     return(NULL);
1182 }
1183 
1184 /**
1185  * xmlAddDefAttrs:
1186  * @ctxt:  an XML parser context
1187  * @fullname:  the element fullname
1188  * @fullattr:  the attribute fullname
1189  * @value:  the attribute value
1190  *
1191  * Add a defaulted attribute for an element
1192  */
1193 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1194 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1195                const xmlChar *fullname,
1196                const xmlChar *fullattr,
1197                const xmlChar *value) {
1198     xmlDefAttrsPtr defaults;
1199     int len;
1200     const xmlChar *name;
1201     const xmlChar *prefix;
1202 
1203     /*
1204      * Allows to detect attribute redefinitions
1205      */
1206     if (ctxt->attsSpecial != NULL) {
1207         if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1208 	    return;
1209     }
1210 
1211     if (ctxt->attsDefault == NULL) {
1212         ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1213 	if (ctxt->attsDefault == NULL)
1214 	    goto mem_error;
1215     }
1216 
1217     /*
1218      * split the element name into prefix:localname , the string found
1219      * are within the DTD and then not associated to namespace names.
1220      */
1221     name = xmlSplitQName3(fullname, &len);
1222     if (name == NULL) {
1223         name = xmlDictLookup(ctxt->dict, fullname, -1);
1224 	prefix = NULL;
1225     } else {
1226         name = xmlDictLookup(ctxt->dict, name, -1);
1227 	prefix = xmlDictLookup(ctxt->dict, fullname, len);
1228     }
1229 
1230     /*
1231      * make sure there is some storage
1232      */
1233     defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1234     if (defaults == NULL) {
1235         defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1236 	                   (4 * 5) * sizeof(const xmlChar *));
1237 	if (defaults == NULL)
1238 	    goto mem_error;
1239 	defaults->nbAttrs = 0;
1240 	defaults->maxAttrs = 4;
1241 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1242 	                        defaults, NULL) < 0) {
1243 	    xmlFree(defaults);
1244 	    goto mem_error;
1245 	}
1246     } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1247         xmlDefAttrsPtr temp;
1248 
1249         temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1250 		       (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1251 	if (temp == NULL)
1252 	    goto mem_error;
1253 	defaults = temp;
1254 	defaults->maxAttrs *= 2;
1255 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1256 	                        defaults, NULL) < 0) {
1257 	    xmlFree(defaults);
1258 	    goto mem_error;
1259 	}
1260     }
1261 
1262     /*
1263      * Split the element name into prefix:localname , the string found
1264      * are within the DTD and hen not associated to namespace names.
1265      */
1266     name = xmlSplitQName3(fullattr, &len);
1267     if (name == NULL) {
1268         name = xmlDictLookup(ctxt->dict, fullattr, -1);
1269 	prefix = NULL;
1270     } else {
1271         name = xmlDictLookup(ctxt->dict, name, -1);
1272 	prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1273     }
1274 
1275     defaults->values[5 * defaults->nbAttrs] = name;
1276     defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1277     /* intern the string and precompute the end */
1278     len = xmlStrlen(value);
1279     value = xmlDictLookup(ctxt->dict, value, len);
1280     defaults->values[5 * defaults->nbAttrs + 2] = value;
1281     defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1282     if (ctxt->external)
1283         defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1284     else
1285         defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1286     defaults->nbAttrs++;
1287 
1288     return;
1289 
1290 mem_error:
1291     xmlErrMemory(ctxt, NULL);
1292     return;
1293 }
1294 
1295 /**
1296  * xmlAddSpecialAttr:
1297  * @ctxt:  an XML parser context
1298  * @fullname:  the element fullname
1299  * @fullattr:  the attribute fullname
1300  * @type:  the attribute type
1301  *
1302  * Register this attribute type
1303  */
1304 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1305 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1306 		  const xmlChar *fullname,
1307 		  const xmlChar *fullattr,
1308 		  int type)
1309 {
1310     if (ctxt->attsSpecial == NULL) {
1311         ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1312 	if (ctxt->attsSpecial == NULL)
1313 	    goto mem_error;
1314     }
1315 
1316     if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1317         return;
1318 
1319     xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1320                      (void *) (long) type);
1321     return;
1322 
1323 mem_error:
1324     xmlErrMemory(ctxt, NULL);
1325     return;
1326 }
1327 
1328 /**
1329  * xmlCleanSpecialAttrCallback:
1330  *
1331  * Removes CDATA attributes from the special attribute table
1332  */
1333 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1334 xmlCleanSpecialAttrCallback(void *payload, void *data,
1335                             const xmlChar *fullname, const xmlChar *fullattr,
1336                             const xmlChar *unused ATTRIBUTE_UNUSED) {
1337     xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1338 
1339     if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1340         xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1341     }
1342 }
1343 
1344 /**
1345  * xmlCleanSpecialAttr:
1346  * @ctxt:  an XML parser context
1347  *
1348  * Trim the list of attributes defined to remove all those of type
1349  * CDATA as they are not special. This call should be done when finishing
1350  * to parse the DTD and before starting to parse the document root.
1351  */
1352 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1353 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1354 {
1355     if (ctxt->attsSpecial == NULL)
1356         return;
1357 
1358     xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1359 
1360     if (xmlHashSize(ctxt->attsSpecial) == 0) {
1361         xmlHashFree(ctxt->attsSpecial, NULL);
1362         ctxt->attsSpecial = NULL;
1363     }
1364     return;
1365 }
1366 
1367 /**
1368  * xmlCheckLanguageID:
1369  * @lang:  pointer to the string value
1370  *
1371  * Checks that the value conforms to the LanguageID production:
1372  *
1373  * NOTE: this is somewhat deprecated, those productions were removed from
1374  *       the XML Second edition.
1375  *
1376  * [33] LanguageID ::= Langcode ('-' Subcode)*
1377  * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1378  * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1379  * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1380  * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1381  * [38] Subcode ::= ([a-z] | [A-Z])+
1382  *
1383  * The current REC reference the sucessors of RFC 1766, currently 5646
1384  *
1385  * http://www.rfc-editor.org/rfc/rfc5646.txt
1386  * langtag       = language
1387  *                 ["-" script]
1388  *                 ["-" region]
1389  *                 *("-" variant)
1390  *                 *("-" extension)
1391  *                 ["-" privateuse]
1392  * language      = 2*3ALPHA            ; shortest ISO 639 code
1393  *                 ["-" extlang]       ; sometimes followed by
1394  *                                     ; extended language subtags
1395  *               / 4ALPHA              ; or reserved for future use
1396  *               / 5*8ALPHA            ; or registered language subtag
1397  *
1398  * extlang       = 3ALPHA              ; selected ISO 639 codes
1399  *                 *2("-" 3ALPHA)      ; permanently reserved
1400  *
1401  * script        = 4ALPHA              ; ISO 15924 code
1402  *
1403  * region        = 2ALPHA              ; ISO 3166-1 code
1404  *               / 3DIGIT              ; UN M.49 code
1405  *
1406  * variant       = 5*8alphanum         ; registered variants
1407  *               / (DIGIT 3alphanum)
1408  *
1409  * extension     = singleton 1*("-" (2*8alphanum))
1410  *
1411  *                                     ; Single alphanumerics
1412  *                                     ; "x" reserved for private use
1413  * singleton     = DIGIT               ; 0 - 9
1414  *               / %x41-57             ; A - W
1415  *               / %x59-5A             ; Y - Z
1416  *               / %x61-77             ; a - w
1417  *               / %x79-7A             ; y - z
1418  *
1419  * it sounds right to still allow Irregular i-xxx IANA and user codes too
1420  * The parser below doesn't try to cope with extension or privateuse
1421  * that could be added but that's not interoperable anyway
1422  *
1423  * Returns 1 if correct 0 otherwise
1424  **/
1425 int
xmlCheckLanguageID(const xmlChar * lang)1426 xmlCheckLanguageID(const xmlChar * lang)
1427 {
1428     const xmlChar *cur = lang, *nxt;
1429 
1430     if (cur == NULL)
1431         return (0);
1432     if (((cur[0] == 'i') && (cur[1] == '-')) ||
1433         ((cur[0] == 'I') && (cur[1] == '-')) ||
1434         ((cur[0] == 'x') && (cur[1] == '-')) ||
1435         ((cur[0] == 'X') && (cur[1] == '-'))) {
1436         /*
1437          * Still allow IANA code and user code which were coming
1438          * from the previous version of the XML-1.0 specification
1439          * it's deprecated but we should not fail
1440          */
1441         cur += 2;
1442         while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1443                ((cur[0] >= 'a') && (cur[0] <= 'z')))
1444             cur++;
1445         return(cur[0] == 0);
1446     }
1447     nxt = cur;
1448     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1449            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1450            nxt++;
1451     if (nxt - cur >= 4) {
1452         /*
1453          * Reserved
1454          */
1455         if ((nxt - cur > 8) || (nxt[0] != 0))
1456             return(0);
1457         return(1);
1458     }
1459     if (nxt - cur < 2)
1460         return(0);
1461     /* we got an ISO 639 code */
1462     if (nxt[0] == 0)
1463         return(1);
1464     if (nxt[0] != '-')
1465         return(0);
1466 
1467     nxt++;
1468     cur = nxt;
1469     /* now we can have extlang or script or region or variant */
1470     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1471         goto region_m49;
1472 
1473     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1474            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1475            nxt++;
1476     if (nxt - cur == 4)
1477         goto script;
1478     if (nxt - cur == 2)
1479         goto region;
1480     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1481         goto variant;
1482     if (nxt - cur != 3)
1483         return(0);
1484     /* we parsed an extlang */
1485     if (nxt[0] == 0)
1486         return(1);
1487     if (nxt[0] != '-')
1488         return(0);
1489 
1490     nxt++;
1491     cur = nxt;
1492     /* now we can have script or region or variant */
1493     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1494         goto region_m49;
1495 
1496     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1497            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1498            nxt++;
1499     if (nxt - cur == 2)
1500         goto region;
1501     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1502         goto variant;
1503     if (nxt - cur != 4)
1504         return(0);
1505     /* we parsed a script */
1506 script:
1507     if (nxt[0] == 0)
1508         return(1);
1509     if (nxt[0] != '-')
1510         return(0);
1511 
1512     nxt++;
1513     cur = nxt;
1514     /* now we can have region or variant */
1515     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1516         goto region_m49;
1517 
1518     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1519            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1520            nxt++;
1521 
1522     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1523         goto variant;
1524     if (nxt - cur != 2)
1525         return(0);
1526     /* we parsed a region */
1527 region:
1528     if (nxt[0] == 0)
1529         return(1);
1530     if (nxt[0] != '-')
1531         return(0);
1532 
1533     nxt++;
1534     cur = nxt;
1535     /* now we can just have a variant */
1536     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1537            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1538            nxt++;
1539 
1540     if ((nxt - cur < 5) || (nxt - cur > 8))
1541         return(0);
1542 
1543     /* we parsed a variant */
1544 variant:
1545     if (nxt[0] == 0)
1546         return(1);
1547     if (nxt[0] != '-')
1548         return(0);
1549     /* extensions and private use subtags not checked */
1550     return (1);
1551 
1552 region_m49:
1553     if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1554         ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1555         nxt += 3;
1556         goto region;
1557     }
1558     return(0);
1559 }
1560 
1561 /************************************************************************
1562  *									*
1563  *		Parser stacks related functions and macros		*
1564  *									*
1565  ************************************************************************/
1566 
1567 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1568                                             const xmlChar ** str);
1569 
1570 #ifdef SAX2
1571 /**
1572  * nsPush:
1573  * @ctxt:  an XML parser context
1574  * @prefix:  the namespace prefix or NULL
1575  * @URL:  the namespace name
1576  *
1577  * Pushes a new parser namespace on top of the ns stack
1578  *
1579  * Returns -1 in case of error, -2 if the namespace should be discarded
1580  *	   and the index in the stack otherwise.
1581  */
1582 static int
nsPush(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URL)1583 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1584 {
1585     if (ctxt->options & XML_PARSE_NSCLEAN) {
1586         int i;
1587 	for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1588 	    if (ctxt->nsTab[i] == prefix) {
1589 		/* in scope */
1590 	        if (ctxt->nsTab[i + 1] == URL)
1591 		    return(-2);
1592 		/* out of scope keep it */
1593 		break;
1594 	    }
1595 	}
1596     }
1597     if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1598 	ctxt->nsMax = 10;
1599 	ctxt->nsNr = 0;
1600 	ctxt->nsTab = (const xmlChar **)
1601 	              xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1602 	if (ctxt->nsTab == NULL) {
1603 	    xmlErrMemory(ctxt, NULL);
1604 	    ctxt->nsMax = 0;
1605             return (-1);
1606 	}
1607     } else if (ctxt->nsNr >= ctxt->nsMax) {
1608         const xmlChar ** tmp;
1609         ctxt->nsMax *= 2;
1610         tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1611 				    ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1612         if (tmp == NULL) {
1613             xmlErrMemory(ctxt, NULL);
1614 	    ctxt->nsMax /= 2;
1615             return (-1);
1616         }
1617 	ctxt->nsTab = tmp;
1618     }
1619     ctxt->nsTab[ctxt->nsNr++] = prefix;
1620     ctxt->nsTab[ctxt->nsNr++] = URL;
1621     return (ctxt->nsNr);
1622 }
1623 /**
1624  * nsPop:
1625  * @ctxt: an XML parser context
1626  * @nr:  the number to pop
1627  *
1628  * Pops the top @nr parser prefix/namespace from the ns stack
1629  *
1630  * Returns the number of namespaces removed
1631  */
1632 static int
nsPop(xmlParserCtxtPtr ctxt,int nr)1633 nsPop(xmlParserCtxtPtr ctxt, int nr)
1634 {
1635     int i;
1636 
1637     if (ctxt->nsTab == NULL) return(0);
1638     if (ctxt->nsNr < nr) {
1639         xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1640         nr = ctxt->nsNr;
1641     }
1642     if (ctxt->nsNr <= 0)
1643         return (0);
1644 
1645     for (i = 0;i < nr;i++) {
1646          ctxt->nsNr--;
1647 	 ctxt->nsTab[ctxt->nsNr] = NULL;
1648     }
1649     return(nr);
1650 }
1651 #endif
1652 
1653 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1654 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1655     const xmlChar **atts;
1656     int *attallocs;
1657     int maxatts;
1658 
1659     if (ctxt->atts == NULL) {
1660 	maxatts = 55; /* allow for 10 attrs by default */
1661 	atts = (const xmlChar **)
1662 	       xmlMalloc(maxatts * sizeof(xmlChar *));
1663 	if (atts == NULL) goto mem_error;
1664 	ctxt->atts = atts;
1665 	attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1666 	if (attallocs == NULL) goto mem_error;
1667 	ctxt->attallocs = attallocs;
1668 	ctxt->maxatts = maxatts;
1669     } else if (nr + 5 > ctxt->maxatts) {
1670 	maxatts = (nr + 5) * 2;
1671 	atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1672 				     maxatts * sizeof(const xmlChar *));
1673 	if (atts == NULL) goto mem_error;
1674 	ctxt->atts = atts;
1675 	attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1676 	                             (maxatts / 5) * sizeof(int));
1677 	if (attallocs == NULL) goto mem_error;
1678 	ctxt->attallocs = attallocs;
1679 	ctxt->maxatts = maxatts;
1680     }
1681     return(ctxt->maxatts);
1682 mem_error:
1683     xmlErrMemory(ctxt, NULL);
1684     return(-1);
1685 }
1686 
1687 /**
1688  * inputPush:
1689  * @ctxt:  an XML parser context
1690  * @value:  the parser input
1691  *
1692  * Pushes a new parser input on top of the input stack
1693  *
1694  * Returns -1 in case of error, the index in the stack otherwise
1695  */
1696 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1697 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1698 {
1699     if ((ctxt == NULL) || (value == NULL))
1700         return(-1);
1701     if (ctxt->inputNr >= ctxt->inputMax) {
1702         ctxt->inputMax *= 2;
1703         ctxt->inputTab =
1704             (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1705                                              ctxt->inputMax *
1706                                              sizeof(ctxt->inputTab[0]));
1707         if (ctxt->inputTab == NULL) {
1708             xmlErrMemory(ctxt, NULL);
1709 	    xmlFreeInputStream(value);
1710 	    ctxt->inputMax /= 2;
1711 	    value = NULL;
1712             return (-1);
1713         }
1714     }
1715     ctxt->inputTab[ctxt->inputNr] = value;
1716     ctxt->input = value;
1717     return (ctxt->inputNr++);
1718 }
1719 /**
1720  * inputPop:
1721  * @ctxt: an XML parser context
1722  *
1723  * Pops the top parser input from the input stack
1724  *
1725  * Returns the input just removed
1726  */
1727 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1728 inputPop(xmlParserCtxtPtr ctxt)
1729 {
1730     xmlParserInputPtr ret;
1731 
1732     if (ctxt == NULL)
1733         return(NULL);
1734     if (ctxt->inputNr <= 0)
1735         return (NULL);
1736     ctxt->inputNr--;
1737     if (ctxt->inputNr > 0)
1738         ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1739     else
1740         ctxt->input = NULL;
1741     ret = ctxt->inputTab[ctxt->inputNr];
1742     ctxt->inputTab[ctxt->inputNr] = NULL;
1743     return (ret);
1744 }
1745 /**
1746  * nodePush:
1747  * @ctxt:  an XML parser context
1748  * @value:  the element node
1749  *
1750  * Pushes a new element node on top of the node stack
1751  *
1752  * Returns -1 in case of error, the index in the stack otherwise
1753  */
1754 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1755 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1756 {
1757     if (ctxt == NULL) return(0);
1758     if (ctxt->nodeNr >= ctxt->nodeMax) {
1759         xmlNodePtr *tmp;
1760 
1761 	tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1762                                       ctxt->nodeMax * 2 *
1763                                       sizeof(ctxt->nodeTab[0]));
1764         if (tmp == NULL) {
1765             xmlErrMemory(ctxt, NULL);
1766             return (-1);
1767         }
1768         ctxt->nodeTab = tmp;
1769 	ctxt->nodeMax *= 2;
1770     }
1771     if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1772         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1773 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1774 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1775 			  xmlParserMaxDepth);
1776 	xmlHaltParser(ctxt);
1777 	return(-1);
1778     }
1779     ctxt->nodeTab[ctxt->nodeNr] = value;
1780     ctxt->node = value;
1781     return (ctxt->nodeNr++);
1782 }
1783 
1784 /**
1785  * nodePop:
1786  * @ctxt: an XML parser context
1787  *
1788  * Pops the top element node from the node stack
1789  *
1790  * Returns the node just removed
1791  */
1792 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)1793 nodePop(xmlParserCtxtPtr ctxt)
1794 {
1795     xmlNodePtr ret;
1796 
1797     if (ctxt == NULL) return(NULL);
1798     if (ctxt->nodeNr <= 0)
1799         return (NULL);
1800     ctxt->nodeNr--;
1801     if (ctxt->nodeNr > 0)
1802         ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1803     else
1804         ctxt->node = NULL;
1805     ret = ctxt->nodeTab[ctxt->nodeNr];
1806     ctxt->nodeTab[ctxt->nodeNr] = NULL;
1807     return (ret);
1808 }
1809 
1810 #ifdef LIBXML_PUSH_ENABLED
1811 /**
1812  * nameNsPush:
1813  * @ctxt:  an XML parser context
1814  * @value:  the element name
1815  * @prefix:  the element prefix
1816  * @URI:  the element namespace name
1817  *
1818  * Pushes a new element name/prefix/URL on top of the name stack
1819  *
1820  * Returns -1 in case of error, the index in the stack otherwise
1821  */
1822 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int nsNr)1823 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1824            const xmlChar *prefix, const xmlChar *URI, int nsNr)
1825 {
1826     if (ctxt->nameNr >= ctxt->nameMax) {
1827         const xmlChar * *tmp;
1828         void **tmp2;
1829         ctxt->nameMax *= 2;
1830         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1831                                     ctxt->nameMax *
1832                                     sizeof(ctxt->nameTab[0]));
1833         if (tmp == NULL) {
1834 	    ctxt->nameMax /= 2;
1835 	    goto mem_error;
1836         }
1837 	ctxt->nameTab = tmp;
1838         tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1839                                     ctxt->nameMax * 3 *
1840                                     sizeof(ctxt->pushTab[0]));
1841         if (tmp2 == NULL) {
1842 	    ctxt->nameMax /= 2;
1843 	    goto mem_error;
1844         }
1845 	ctxt->pushTab = tmp2;
1846     }
1847     ctxt->nameTab[ctxt->nameNr] = value;
1848     ctxt->name = value;
1849     ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1850     ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1851     ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1852     return (ctxt->nameNr++);
1853 mem_error:
1854     xmlErrMemory(ctxt, NULL);
1855     return (-1);
1856 }
1857 /**
1858  * nameNsPop:
1859  * @ctxt: an XML parser context
1860  *
1861  * Pops the top element/prefix/URI name from the name stack
1862  *
1863  * Returns the name just removed
1864  */
1865 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)1866 nameNsPop(xmlParserCtxtPtr ctxt)
1867 {
1868     const xmlChar *ret;
1869 
1870     if (ctxt->nameNr <= 0)
1871         return (NULL);
1872     ctxt->nameNr--;
1873     if (ctxt->nameNr > 0)
1874         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1875     else
1876         ctxt->name = NULL;
1877     ret = ctxt->nameTab[ctxt->nameNr];
1878     ctxt->nameTab[ctxt->nameNr] = NULL;
1879     return (ret);
1880 }
1881 #endif /* LIBXML_PUSH_ENABLED */
1882 
1883 /**
1884  * namePush:
1885  * @ctxt:  an XML parser context
1886  * @value:  the element name
1887  *
1888  * Pushes a new element name on top of the name stack
1889  *
1890  * Returns -1 in case of error, the index in the stack otherwise
1891  */
1892 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)1893 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1894 {
1895     if (ctxt == NULL) return (-1);
1896 
1897     if (ctxt->nameNr >= ctxt->nameMax) {
1898         const xmlChar * *tmp;
1899         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1900                                     ctxt->nameMax * 2 *
1901                                     sizeof(ctxt->nameTab[0]));
1902         if (tmp == NULL) {
1903 	    goto mem_error;
1904         }
1905 	ctxt->nameTab = tmp;
1906         ctxt->nameMax *= 2;
1907     }
1908     ctxt->nameTab[ctxt->nameNr] = value;
1909     ctxt->name = value;
1910     return (ctxt->nameNr++);
1911 mem_error:
1912     xmlErrMemory(ctxt, NULL);
1913     return (-1);
1914 }
1915 /**
1916  * namePop:
1917  * @ctxt: an XML parser context
1918  *
1919  * Pops the top element name from the name stack
1920  *
1921  * Returns the name just removed
1922  */
1923 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)1924 namePop(xmlParserCtxtPtr ctxt)
1925 {
1926     const xmlChar *ret;
1927 
1928     if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1929         return (NULL);
1930     ctxt->nameNr--;
1931     if (ctxt->nameNr > 0)
1932         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1933     else
1934         ctxt->name = NULL;
1935     ret = ctxt->nameTab[ctxt->nameNr];
1936     ctxt->nameTab[ctxt->nameNr] = NULL;
1937     return (ret);
1938 }
1939 
spacePush(xmlParserCtxtPtr ctxt,int val)1940 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1941     if (ctxt->spaceNr >= ctxt->spaceMax) {
1942         int *tmp;
1943 
1944 	ctxt->spaceMax *= 2;
1945         tmp = (int *) xmlRealloc(ctxt->spaceTab,
1946 	                         ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1947         if (tmp == NULL) {
1948 	    xmlErrMemory(ctxt, NULL);
1949 	    ctxt->spaceMax /=2;
1950 	    return(-1);
1951 	}
1952 	ctxt->spaceTab = tmp;
1953     }
1954     ctxt->spaceTab[ctxt->spaceNr] = val;
1955     ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1956     return(ctxt->spaceNr++);
1957 }
1958 
spacePop(xmlParserCtxtPtr ctxt)1959 static int spacePop(xmlParserCtxtPtr ctxt) {
1960     int ret;
1961     if (ctxt->spaceNr <= 0) return(0);
1962     ctxt->spaceNr--;
1963     if (ctxt->spaceNr > 0)
1964 	ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1965     else
1966         ctxt->space = &ctxt->spaceTab[0];
1967     ret = ctxt->spaceTab[ctxt->spaceNr];
1968     ctxt->spaceTab[ctxt->spaceNr] = -1;
1969     return(ret);
1970 }
1971 
1972 /*
1973  * Macros for accessing the content. Those should be used only by the parser,
1974  * and not exported.
1975  *
1976  * Dirty macros, i.e. one often need to make assumption on the context to
1977  * use them
1978  *
1979  *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1980  *           To be used with extreme caution since operations consuming
1981  *           characters may move the input buffer to a different location !
1982  *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1983  *           This should be used internally by the parser
1984  *           only to compare to ASCII values otherwise it would break when
1985  *           running with UTF-8 encoding.
1986  *   RAW     same as CUR but in the input buffer, bypass any token
1987  *           extraction that may have been done
1988  *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1989  *           to compare on ASCII based substring.
1990  *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1991  *           strings without newlines within the parser.
1992  *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1993  *           defined char within the parser.
1994  * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1995  *
1996  *   NEXT    Skip to the next character, this does the proper decoding
1997  *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
1998  *   NEXTL(l) Skip the current unicode character of l xmlChars long.
1999  *   CUR_CHAR(l) returns the current unicode character (int), set l
2000  *           to the number of xmlChars used for the encoding [0-5].
2001  *   CUR_SCHAR  same but operate on a string instead of the context
2002  *   COPY_BUF  copy the current unicode char to the target buffer, increment
2003  *            the index
2004  *   GROW, SHRINK  handling of input buffers
2005  */
2006 
2007 #define RAW (*ctxt->input->cur)
2008 #define CUR (*ctxt->input->cur)
2009 #define NXT(val) ctxt->input->cur[(val)]
2010 #define CUR_PTR ctxt->input->cur
2011 
2012 #define CMP4( s, c1, c2, c3, c4 ) \
2013   ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2014     ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2015 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2016   ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2017 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2018   ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2019 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2020   ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2021 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2022   ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2023 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2024   ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2025     ((unsigned char *) s)[ 8 ] == c9 )
2026 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2027   ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2028     ((unsigned char *) s)[ 9 ] == c10 )
2029 
2030 #define SKIP(val) do {							\
2031     ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val);			\
2032     if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
2033     if ((*ctxt->input->cur == 0) &&					\
2034         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))		\
2035 	    xmlPopInput(ctxt);						\
2036   } while (0)
2037 
2038 #define SKIPL(val) do {							\
2039     int skipl;								\
2040     for(skipl=0; skipl<val; skipl++) {					\
2041 	if (*(ctxt->input->cur) == '\n') {				\
2042 	ctxt->input->line++; ctxt->input->col = 1;			\
2043 	} else ctxt->input->col++;					\
2044 	ctxt->nbChars++;						\
2045 	ctxt->input->cur++;						\
2046     }									\
2047     if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
2048     if ((*ctxt->input->cur == 0) &&					\
2049         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))		\
2050 	    xmlPopInput(ctxt);						\
2051   } while (0)
2052 
2053 #define SHRINK if ((ctxt->progressive == 0) &&				\
2054 		   (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2055 		   (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2056 	xmlSHRINK (ctxt);
2057 
xmlSHRINK(xmlParserCtxtPtr ctxt)2058 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2059     xmlParserInputShrink(ctxt->input);
2060     if ((*ctxt->input->cur == 0) &&
2061         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2062 	    xmlPopInput(ctxt);
2063   }
2064 
2065 #define GROW if ((ctxt->progressive == 0) &&				\
2066 		 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK))	\
2067 	xmlGROW (ctxt);
2068 
xmlGROW(xmlParserCtxtPtr ctxt)2069 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2070     unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2071     unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2072 
2073     if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2074          (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
2075          ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
2076         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2077         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2078         xmlHaltParser(ctxt);
2079 	return;
2080     }
2081     xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2082     if ((ctxt->input->cur > ctxt->input->end) ||
2083         (ctxt->input->cur < ctxt->input->base)) {
2084         xmlHaltParser(ctxt);
2085         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2086 	return;
2087     }
2088     if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
2089         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2090 	    xmlPopInput(ctxt);
2091 }
2092 
2093 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2094 
2095 #define NEXT xmlNextChar(ctxt)
2096 
2097 #define NEXT1 {								\
2098 	ctxt->input->col++;						\
2099 	ctxt->input->cur++;						\
2100 	ctxt->nbChars++;						\
2101 	if (*ctxt->input->cur == 0)					\
2102 	    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);		\
2103     }
2104 
2105 #define NEXTL(l) do {							\
2106     if (*(ctxt->input->cur) == '\n') {					\
2107 	ctxt->input->line++; ctxt->input->col = 1;			\
2108     } else ctxt->input->col++;						\
2109     ctxt->input->cur += l;				\
2110     if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
2111   } while (0)
2112 
2113 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2114 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2115 
2116 #define COPY_BUF(l,b,i,v)						\
2117     if (l == 1) b[i++] = (xmlChar) v;					\
2118     else i += xmlCopyCharMultiByte(&b[i],v)
2119 
2120 /**
2121  * xmlSkipBlankChars:
2122  * @ctxt:  the XML parser context
2123  *
2124  * skip all blanks character found at that point in the input streams.
2125  * It pops up finished entities in the process if allowable at that point.
2126  *
2127  * Returns the number of space chars skipped
2128  */
2129 
2130 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2131 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2132     int res = 0;
2133 
2134     /*
2135      * It's Okay to use CUR/NEXT here since all the blanks are on
2136      * the ASCII range.
2137      */
2138     if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2139 	const xmlChar *cur;
2140 	/*
2141 	 * if we are in the document content, go really fast
2142 	 */
2143 	cur = ctxt->input->cur;
2144 	while (IS_BLANK_CH(*cur)) {
2145 	    if (*cur == '\n') {
2146 		ctxt->input->line++; ctxt->input->col = 1;
2147 	    } else {
2148 		ctxt->input->col++;
2149 	    }
2150 	    cur++;
2151 	    res++;
2152 	    if (*cur == 0) {
2153 		ctxt->input->cur = cur;
2154 		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2155 		cur = ctxt->input->cur;
2156 	    }
2157 	}
2158 	ctxt->input->cur = cur;
2159     } else {
2160 	int cur;
2161 	do {
2162 	    cur = CUR;
2163 	    while ((IS_BLANK_CH(cur) && /* CHECKED tstblanks.xml */
2164 	           (ctxt->instate != XML_PARSER_EOF))) {
2165 		NEXT;
2166 		cur = CUR;
2167 		res++;
2168 	    }
2169 	    while ((cur == 0) && (ctxt->inputNr > 1) &&
2170 		   (ctxt->instate != XML_PARSER_COMMENT)) {
2171 		xmlPopInput(ctxt);
2172 		cur = CUR;
2173 	    }
2174 	    /*
2175 	     * Need to handle support of entities branching here
2176 	     */
2177 	    if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2178 	} while ((IS_BLANK(cur)) && /* CHECKED tstblanks.xml */
2179 	         (ctxt->instate != XML_PARSER_EOF));
2180     }
2181     return(res);
2182 }
2183 
2184 /************************************************************************
2185  *									*
2186  *		Commodity functions to handle entities			*
2187  *									*
2188  ************************************************************************/
2189 
2190 /**
2191  * xmlPopInput:
2192  * @ctxt:  an XML parser context
2193  *
2194  * xmlPopInput: the current input pointed by ctxt->input came to an end
2195  *          pop it and return the next char.
2196  *
2197  * Returns the current xmlChar in the parser context
2198  */
2199 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2200 xmlPopInput(xmlParserCtxtPtr ctxt) {
2201     if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2202     if (xmlParserDebugEntities)
2203 	xmlGenericError(xmlGenericErrorContext,
2204 		"Popping input %d\n", ctxt->inputNr);
2205     xmlFreeInputStream(inputPop(ctxt));
2206     if ((*ctxt->input->cur == 0) &&
2207         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2208 	    return(xmlPopInput(ctxt));
2209     return(CUR);
2210 }
2211 
2212 /**
2213  * xmlPushInput:
2214  * @ctxt:  an XML parser context
2215  * @input:  an XML parser input fragment (entity, XML fragment ...).
2216  *
2217  * xmlPushInput: switch to a new input stream which is stacked on top
2218  *               of the previous one(s).
2219  * Returns -1 in case of error or the index in the input stack
2220  */
2221 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2222 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2223     int ret;
2224     if (input == NULL) return(-1);
2225 
2226     if (xmlParserDebugEntities) {
2227 	if ((ctxt->input != NULL) && (ctxt->input->filename))
2228 	    xmlGenericError(xmlGenericErrorContext,
2229 		    "%s(%d): ", ctxt->input->filename,
2230 		    ctxt->input->line);
2231 	xmlGenericError(xmlGenericErrorContext,
2232 		"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2233     }
2234     ret = inputPush(ctxt, input);
2235     if (ctxt->instate == XML_PARSER_EOF)
2236         return(-1);
2237     GROW;
2238     return(ret);
2239 }
2240 
2241 /**
2242  * xmlParseCharRef:
2243  * @ctxt:  an XML parser context
2244  *
2245  * parse Reference declarations
2246  *
2247  * [66] CharRef ::= '&#' [0-9]+ ';' |
2248  *                  '&#x' [0-9a-fA-F]+ ';'
2249  *
2250  * [ WFC: Legal Character ]
2251  * Characters referred to using character references must match the
2252  * production for Char.
2253  *
2254  * Returns the value parsed (as an int), 0 in case of error
2255  */
2256 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2257 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2258     unsigned int val = 0;
2259     int count = 0;
2260     unsigned int outofrange = 0;
2261 
2262     /*
2263      * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2264      */
2265     if ((RAW == '&') && (NXT(1) == '#') &&
2266         (NXT(2) == 'x')) {
2267 	SKIP(3);
2268 	GROW;
2269 	while (RAW != ';') { /* loop blocked by count */
2270 	    if (count++ > 20) {
2271 		count = 0;
2272 		GROW;
2273                 if (ctxt->instate == XML_PARSER_EOF)
2274                     return(0);
2275 	    }
2276 	    if ((RAW >= '0') && (RAW <= '9'))
2277 	        val = val * 16 + (CUR - '0');
2278 	    else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2279 	        val = val * 16 + (CUR - 'a') + 10;
2280 	    else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2281 	        val = val * 16 + (CUR - 'A') + 10;
2282 	    else {
2283 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2284 		val = 0;
2285 		break;
2286 	    }
2287 	    if (val > 0x10FFFF)
2288 	        outofrange = val;
2289 
2290 	    NEXT;
2291 	    count++;
2292 	}
2293 	if (RAW == ';') {
2294 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2295 	    ctxt->input->col++;
2296 	    ctxt->nbChars ++;
2297 	    ctxt->input->cur++;
2298 	}
2299     } else if  ((RAW == '&') && (NXT(1) == '#')) {
2300 	SKIP(2);
2301 	GROW;
2302 	while (RAW != ';') { /* loop blocked by count */
2303 	    if (count++ > 20) {
2304 		count = 0;
2305 		GROW;
2306                 if (ctxt->instate == XML_PARSER_EOF)
2307                     return(0);
2308 	    }
2309 	    if ((RAW >= '0') && (RAW <= '9'))
2310 	        val = val * 10 + (CUR - '0');
2311 	    else {
2312 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2313 		val = 0;
2314 		break;
2315 	    }
2316 	    if (val > 0x10FFFF)
2317 	        outofrange = val;
2318 
2319 	    NEXT;
2320 	    count++;
2321 	}
2322 	if (RAW == ';') {
2323 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2324 	    ctxt->input->col++;
2325 	    ctxt->nbChars ++;
2326 	    ctxt->input->cur++;
2327 	}
2328     } else {
2329         xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2330     }
2331 
2332     /*
2333      * [ WFC: Legal Character ]
2334      * Characters referred to using character references must match the
2335      * production for Char.
2336      */
2337     if ((IS_CHAR(val) && (outofrange == 0))) {
2338         return(val);
2339     } else {
2340         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2341                           "xmlParseCharRef: invalid xmlChar value %d\n",
2342 	                  val);
2343     }
2344     return(0);
2345 }
2346 
2347 /**
2348  * xmlParseStringCharRef:
2349  * @ctxt:  an XML parser context
2350  * @str:  a pointer to an index in the string
2351  *
2352  * parse Reference declarations, variant parsing from a string rather
2353  * than an an input flow.
2354  *
2355  * [66] CharRef ::= '&#' [0-9]+ ';' |
2356  *                  '&#x' [0-9a-fA-F]+ ';'
2357  *
2358  * [ WFC: Legal Character ]
2359  * Characters referred to using character references must match the
2360  * production for Char.
2361  *
2362  * Returns the value parsed (as an int), 0 in case of error, str will be
2363  *         updated to the current value of the index
2364  */
2365 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2366 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2367     const xmlChar *ptr;
2368     xmlChar cur;
2369     unsigned int val = 0;
2370     unsigned int outofrange = 0;
2371 
2372     if ((str == NULL) || (*str == NULL)) return(0);
2373     ptr = *str;
2374     cur = *ptr;
2375     if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2376 	ptr += 3;
2377 	cur = *ptr;
2378 	while (cur != ';') { /* Non input consuming loop */
2379 	    if ((cur >= '0') && (cur <= '9'))
2380 	        val = val * 16 + (cur - '0');
2381 	    else if ((cur >= 'a') && (cur <= 'f'))
2382 	        val = val * 16 + (cur - 'a') + 10;
2383 	    else if ((cur >= 'A') && (cur <= 'F'))
2384 	        val = val * 16 + (cur - 'A') + 10;
2385 	    else {
2386 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2387 		val = 0;
2388 		break;
2389 	    }
2390 	    if (val > 0x10FFFF)
2391 	        outofrange = val;
2392 
2393 	    ptr++;
2394 	    cur = *ptr;
2395 	}
2396 	if (cur == ';')
2397 	    ptr++;
2398     } else if  ((cur == '&') && (ptr[1] == '#')){
2399 	ptr += 2;
2400 	cur = *ptr;
2401 	while (cur != ';') { /* Non input consuming loops */
2402 	    if ((cur >= '0') && (cur <= '9'))
2403 	        val = val * 10 + (cur - '0');
2404 	    else {
2405 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2406 		val = 0;
2407 		break;
2408 	    }
2409 	    if (val > 0x10FFFF)
2410 	        outofrange = val;
2411 
2412 	    ptr++;
2413 	    cur = *ptr;
2414 	}
2415 	if (cur == ';')
2416 	    ptr++;
2417     } else {
2418 	xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2419 	return(0);
2420     }
2421     *str = ptr;
2422 
2423     /*
2424      * [ WFC: Legal Character ]
2425      * Characters referred to using character references must match the
2426      * production for Char.
2427      */
2428     if ((IS_CHAR(val) && (outofrange == 0))) {
2429         return(val);
2430     } else {
2431         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2432 			  "xmlParseStringCharRef: invalid xmlChar value %d\n",
2433 			  val);
2434     }
2435     return(0);
2436 }
2437 
2438 /**
2439  * xmlNewBlanksWrapperInputStream:
2440  * @ctxt:  an XML parser context
2441  * @entity:  an Entity pointer
2442  *
2443  * Create a new input stream for wrapping
2444  * blanks around a PEReference
2445  *
2446  * Returns the new input stream or NULL
2447  */
2448 
deallocblankswrapper(xmlChar * str)2449 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2450 
2451 static xmlParserInputPtr
xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)2452 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2453     xmlParserInputPtr input;
2454     xmlChar *buffer;
2455     size_t length;
2456     if (entity == NULL) {
2457 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2458 	            "xmlNewBlanksWrapperInputStream entity\n");
2459 	return(NULL);
2460     }
2461     if (xmlParserDebugEntities)
2462 	xmlGenericError(xmlGenericErrorContext,
2463 		"new blanks wrapper for entity: %s\n", entity->name);
2464     input = xmlNewInputStream(ctxt);
2465     if (input == NULL) {
2466 	return(NULL);
2467     }
2468     length = xmlStrlen(entity->name) + 5;
2469     buffer = xmlMallocAtomic(length);
2470     if (buffer == NULL) {
2471 	xmlErrMemory(ctxt, NULL);
2472         xmlFree(input);
2473 	return(NULL);
2474     }
2475     buffer [0] = ' ';
2476     buffer [1] = '%';
2477     buffer [length-3] = ';';
2478     buffer [length-2] = ' ';
2479     buffer [length-1] = 0;
2480     memcpy(buffer + 2, entity->name, length - 5);
2481     input->free = deallocblankswrapper;
2482     input->base = buffer;
2483     input->cur = buffer;
2484     input->length = length;
2485     input->end = &buffer[length];
2486     return(input);
2487 }
2488 
2489 /**
2490  * xmlParserHandlePEReference:
2491  * @ctxt:  the parser context
2492  *
2493  * [69] PEReference ::= '%' Name ';'
2494  *
2495  * [ WFC: No Recursion ]
2496  * A parsed entity must not contain a recursive
2497  * reference to itself, either directly or indirectly.
2498  *
2499  * [ WFC: Entity Declared ]
2500  * In a document without any DTD, a document with only an internal DTD
2501  * subset which contains no parameter entity references, or a document
2502  * with "standalone='yes'", ...  ... The declaration of a parameter
2503  * entity must precede any reference to it...
2504  *
2505  * [ VC: Entity Declared ]
2506  * In a document with an external subset or external parameter entities
2507  * with "standalone='no'", ...  ... The declaration of a parameter entity
2508  * must precede any reference to it...
2509  *
2510  * [ WFC: In DTD ]
2511  * Parameter-entity references may only appear in the DTD.
2512  * NOTE: misleading but this is handled.
2513  *
2514  * A PEReference may have been detected in the current input stream
2515  * the handling is done accordingly to
2516  *      http://www.w3.org/TR/REC-xml#entproc
2517  * i.e.
2518  *   - Included in literal in entity values
2519  *   - Included as Parameter Entity reference within DTDs
2520  */
2521 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2522 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2523     const xmlChar *name;
2524     xmlEntityPtr entity = NULL;
2525     xmlParserInputPtr input;
2526 
2527     if (RAW != '%') return;
2528     switch(ctxt->instate) {
2529 	case XML_PARSER_CDATA_SECTION:
2530 	    return;
2531         case XML_PARSER_COMMENT:
2532 	    return;
2533 	case XML_PARSER_START_TAG:
2534 	    return;
2535 	case XML_PARSER_END_TAG:
2536 	    return;
2537         case XML_PARSER_EOF:
2538 	    xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2539 	    return;
2540         case XML_PARSER_PROLOG:
2541 	case XML_PARSER_START:
2542 	case XML_PARSER_MISC:
2543 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2544 	    return;
2545 	case XML_PARSER_ENTITY_DECL:
2546         case XML_PARSER_CONTENT:
2547         case XML_PARSER_ATTRIBUTE_VALUE:
2548         case XML_PARSER_PI:
2549 	case XML_PARSER_SYSTEM_LITERAL:
2550 	case XML_PARSER_PUBLIC_LITERAL:
2551 	    /* we just ignore it there */
2552 	    return;
2553         case XML_PARSER_EPILOG:
2554 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2555 	    return;
2556 	case XML_PARSER_ENTITY_VALUE:
2557 	    /*
2558 	     * NOTE: in the case of entity values, we don't do the
2559 	     *       substitution here since we need the literal
2560 	     *       entity value to be able to save the internal
2561 	     *       subset of the document.
2562 	     *       This will be handled by xmlStringDecodeEntities
2563 	     */
2564 	    return;
2565         case XML_PARSER_DTD:
2566 	    /*
2567 	     * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2568 	     * In the internal DTD subset, parameter-entity references
2569 	     * can occur only where markup declarations can occur, not
2570 	     * within markup declarations.
2571 	     * In that case this is handled in xmlParseMarkupDecl
2572 	     */
2573 	    if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2574 		return;
2575 	    if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2576 		return;
2577             break;
2578         case XML_PARSER_IGNORE:
2579             return;
2580     }
2581 
2582     NEXT;
2583     name = xmlParseName(ctxt);
2584     if (xmlParserDebugEntities)
2585 	xmlGenericError(xmlGenericErrorContext,
2586 		"PEReference: %s\n", name);
2587     if (name == NULL) {
2588 	xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2589     } else {
2590 	if (RAW == ';') {
2591 	    NEXT;
2592 	    if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2593 		entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2594 	    if (ctxt->instate == XML_PARSER_EOF)
2595 	        return;
2596 	    if (entity == NULL) {
2597 
2598 		/*
2599 		 * [ WFC: Entity Declared ]
2600 		 * In a document without any DTD, a document with only an
2601 		 * internal DTD subset which contains no parameter entity
2602 		 * references, or a document with "standalone='yes'", ...
2603 		 * ... The declaration of a parameter entity must precede
2604 		 * any reference to it...
2605 		 */
2606 		if ((ctxt->standalone == 1) ||
2607 		    ((ctxt->hasExternalSubset == 0) &&
2608 		     (ctxt->hasPErefs == 0))) {
2609 		    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2610 			 "PEReference: %%%s; not found\n", name);
2611 	        } else {
2612 		    /*
2613 		     * [ VC: Entity Declared ]
2614 		     * In a document with an external subset or external
2615 		     * parameter entities with "standalone='no'", ...
2616 		     * ... The declaration of a parameter entity must precede
2617 		     * any reference to it...
2618 		     */
2619 		    if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2620 		        xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2621 			                 "PEReference: %%%s; not found\n",
2622 				         name, NULL);
2623 		    } else
2624 		        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2625 			              "PEReference: %%%s; not found\n",
2626 				      name, NULL);
2627 		    ctxt->valid = 0;
2628 		}
2629 		xmlParserEntityCheck(ctxt, 0, NULL, 0);
2630 	    } else if (ctxt->input->free != deallocblankswrapper) {
2631 		    input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2632 		    if (xmlPushInput(ctxt, input) < 0)
2633 		        return;
2634 	    } else {
2635 	        if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2636 		    (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2637 		    xmlChar start[4];
2638 		    xmlCharEncoding enc;
2639 
2640 		    /*
2641 		     * Note: external parameter entities will not be loaded, it
2642 		     * is not required for a non-validating parser, unless the
2643 		     * option of validating, or substituting entities were
2644 		     * given. Doing so is far more secure as the parser will
2645 		     * only process data coming from the document entity by
2646 		     * default.
2647 		     */
2648                     if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2649 		        ((ctxt->options & XML_PARSE_NOENT) == 0) &&
2650 			((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
2651 			((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
2652 			((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
2653 			(ctxt->replaceEntities == 0) &&
2654 			(ctxt->validate == 0))
2655 			return;
2656 
2657 		    /*
2658 		     * handle the extra spaces added before and after
2659 		     * c.f. http://www.w3.org/TR/REC-xml#as-PE
2660 		     * this is done independently.
2661 		     */
2662 		    input = xmlNewEntityInputStream(ctxt, entity);
2663 		    if (xmlPushInput(ctxt, input) < 0)
2664 		        return;
2665 
2666 		    /*
2667 		     * Get the 4 first bytes and decode the charset
2668 		     * if enc != XML_CHAR_ENCODING_NONE
2669 		     * plug some encoding conversion routines.
2670 		     * Note that, since we may have some non-UTF8
2671 		     * encoding (like UTF16, bug 135229), the 'length'
2672 		     * is not known, but we can calculate based upon
2673 		     * the amount of data in the buffer.
2674 		     */
2675 		    GROW
2676                     if (ctxt->instate == XML_PARSER_EOF)
2677                         return;
2678 		    if ((ctxt->input->end - ctxt->input->cur)>=4) {
2679 			start[0] = RAW;
2680 			start[1] = NXT(1);
2681 			start[2] = NXT(2);
2682 			start[3] = NXT(3);
2683 			enc = xmlDetectCharEncoding(start, 4);
2684 			if (enc != XML_CHAR_ENCODING_NONE) {
2685 			    xmlSwitchEncoding(ctxt, enc);
2686 			}
2687 		    }
2688 
2689 		    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2690 			(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2691 			(IS_BLANK_CH(NXT(5)))) {
2692 			xmlParseTextDecl(ctxt);
2693 		    }
2694 		} else {
2695 		    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2696 			     "PEReference: %s is not a parameter entity\n",
2697 				      name);
2698 		}
2699 	    }
2700 	} else {
2701 	    xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2702 	}
2703     }
2704 }
2705 
2706 /*
2707  * Macro used to grow the current buffer.
2708  * buffer##_size is expected to be a size_t
2709  * mem_error: is expected to handle memory allocation failures
2710  */
2711 #define growBuffer(buffer, n) {						\
2712     xmlChar *tmp;							\
2713     size_t new_size = buffer##_size * 2 + n;                            \
2714     if (new_size < buffer##_size) goto mem_error;                       \
2715     tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2716     if (tmp == NULL) goto mem_error;					\
2717     buffer = tmp;							\
2718     buffer##_size = new_size;                                           \
2719 }
2720 
2721 /**
2722  * xmlStringLenDecodeEntities:
2723  * @ctxt:  the parser context
2724  * @str:  the input string
2725  * @len: the string length
2726  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2727  * @end:  an end marker xmlChar, 0 if none
2728  * @end2:  an end marker xmlChar, 0 if none
2729  * @end3:  an end marker xmlChar, 0 if none
2730  *
2731  * Takes a entity string content and process to do the adequate substitutions.
2732  *
2733  * [67] Reference ::= EntityRef | CharRef
2734  *
2735  * [69] PEReference ::= '%' Name ';'
2736  *
2737  * Returns A newly allocated string with the substitution done. The caller
2738  *      must deallocate it !
2739  */
2740 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3)2741 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2742 		      int what, xmlChar end, xmlChar  end2, xmlChar end3) {
2743     xmlChar *buffer = NULL;
2744     size_t buffer_size = 0;
2745     size_t nbchars = 0;
2746 
2747     xmlChar *current = NULL;
2748     xmlChar *rep = NULL;
2749     const xmlChar *last;
2750     xmlEntityPtr ent;
2751     int c,l;
2752 
2753     if ((ctxt == NULL) || (str == NULL) || (len < 0))
2754 	return(NULL);
2755     last = str + len;
2756 
2757     if (((ctxt->depth > 40) &&
2758          ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2759 	(ctxt->depth > 1024)) {
2760 	xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2761 	return(NULL);
2762     }
2763 
2764     /*
2765      * allocate a translation buffer.
2766      */
2767     buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2768     buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2769     if (buffer == NULL) goto mem_error;
2770 
2771     /*
2772      * OK loop until we reach one of the ending char or a size limit.
2773      * we are operating on already parsed values.
2774      */
2775     if (str < last)
2776 	c = CUR_SCHAR(str, l);
2777     else
2778         c = 0;
2779     while ((c != 0) && (c != end) && /* non input consuming loop */
2780 	   (c != end2) && (c != end3)) {
2781 
2782 	if (c == 0) break;
2783         if ((c == '&') && (str[1] == '#')) {
2784 	    int val = xmlParseStringCharRef(ctxt, &str);
2785 	    if (val != 0) {
2786 		COPY_BUF(0,buffer,nbchars,val);
2787 	    }
2788 	    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2789 	        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2790 	    }
2791 	} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2792 	    if (xmlParserDebugEntities)
2793 		xmlGenericError(xmlGenericErrorContext,
2794 			"String decoding Entity Reference: %.30s\n",
2795 			str);
2796 	    ent = xmlParseStringEntityRef(ctxt, &str);
2797 	    if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2798 	        (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2799 	        goto int_error;
2800 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
2801 	    if (ent != NULL)
2802 	        ctxt->nbentities += ent->checked / 2;
2803 	    if ((ent != NULL) &&
2804 		(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2805 		if (ent->content != NULL) {
2806 		    COPY_BUF(0,buffer,nbchars,ent->content[0]);
2807 		    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2808 			growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2809 		    }
2810 		} else {
2811 		    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2812 			    "predefined entity has no content\n");
2813 		}
2814 	    } else if ((ent != NULL) && (ent->content != NULL)) {
2815 		ctxt->depth++;
2816 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2817 			                      0, 0, 0);
2818 		ctxt->depth--;
2819 
2820 		if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2821 		    (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2822 		    goto int_error;
2823 
2824 		if (rep != NULL) {
2825 		    current = rep;
2826 		    while (*current != 0) { /* non input consuming loop */
2827 			buffer[nbchars++] = *current++;
2828 			if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2829 			    if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2830 				goto int_error;
2831 			    growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2832 			}
2833 		    }
2834 		    xmlFree(rep);
2835 		    rep = NULL;
2836 		}
2837 	    } else if (ent != NULL) {
2838 		int i = xmlStrlen(ent->name);
2839 		const xmlChar *cur = ent->name;
2840 
2841 		buffer[nbchars++] = '&';
2842 		if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2843 		    growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2844 		}
2845 		for (;i > 0;i--)
2846 		    buffer[nbchars++] = *cur++;
2847 		buffer[nbchars++] = ';';
2848 	    }
2849 	} else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2850 	    if (xmlParserDebugEntities)
2851 		xmlGenericError(xmlGenericErrorContext,
2852 			"String decoding PE Reference: %.30s\n", str);
2853 	    ent = xmlParseStringPEReference(ctxt, &str);
2854 	    if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2855 	        goto int_error;
2856 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
2857 	    if (ent != NULL)
2858 	        ctxt->nbentities += ent->checked / 2;
2859 	    if (ent != NULL) {
2860                 if (ent->content == NULL) {
2861 		    xmlLoadEntityContent(ctxt, ent);
2862 		}
2863 		ctxt->depth++;
2864 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2865 			                      0, 0, 0);
2866 		ctxt->depth--;
2867 		if (rep != NULL) {
2868 		    current = rep;
2869 		    while (*current != 0) { /* non input consuming loop */
2870 			buffer[nbchars++] = *current++;
2871 			if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2872 			    if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2873 			        goto int_error;
2874 			    growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2875 			}
2876 		    }
2877 		    xmlFree(rep);
2878 		    rep = NULL;
2879 		}
2880 	    }
2881 	} else {
2882 	    COPY_BUF(l,buffer,nbchars,c);
2883 	    str += l;
2884 	    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2885 	        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2886 	    }
2887 	}
2888 	if (str < last)
2889 	    c = CUR_SCHAR(str, l);
2890 	else
2891 	    c = 0;
2892     }
2893     buffer[nbchars] = 0;
2894     return(buffer);
2895 
2896 mem_error:
2897     xmlErrMemory(ctxt, NULL);
2898 int_error:
2899     if (rep != NULL)
2900         xmlFree(rep);
2901     if (buffer != NULL)
2902         xmlFree(buffer);
2903     return(NULL);
2904 }
2905 
2906 /**
2907  * xmlStringDecodeEntities:
2908  * @ctxt:  the parser context
2909  * @str:  the input string
2910  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2911  * @end:  an end marker xmlChar, 0 if none
2912  * @end2:  an end marker xmlChar, 0 if none
2913  * @end3:  an end marker xmlChar, 0 if none
2914  *
2915  * Takes a entity string content and process to do the adequate substitutions.
2916  *
2917  * [67] Reference ::= EntityRef | CharRef
2918  *
2919  * [69] PEReference ::= '%' Name ';'
2920  *
2921  * Returns A newly allocated string with the substitution done. The caller
2922  *      must deallocate it !
2923  */
2924 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what,xmlChar end,xmlChar end2,xmlChar end3)2925 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2926 		        xmlChar end, xmlChar  end2, xmlChar end3) {
2927     if ((ctxt == NULL) || (str == NULL)) return(NULL);
2928     return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2929            end, end2, end3));
2930 }
2931 
2932 /************************************************************************
2933  *									*
2934  *		Commodity functions, cleanup needed ?			*
2935  *									*
2936  ************************************************************************/
2937 
2938 /**
2939  * areBlanks:
2940  * @ctxt:  an XML parser context
2941  * @str:  a xmlChar *
2942  * @len:  the size of @str
2943  * @blank_chars: we know the chars are blanks
2944  *
2945  * Is this a sequence of blank chars that one can ignore ?
2946  *
2947  * Returns 1 if ignorable 0 otherwise.
2948  */
2949 
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2950 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2951                      int blank_chars) {
2952     int i, ret;
2953     xmlNodePtr lastChild;
2954 
2955     /*
2956      * Don't spend time trying to differentiate them, the same callback is
2957      * used !
2958      */
2959     if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2960 	return(0);
2961 
2962     /*
2963      * Check for xml:space value.
2964      */
2965     if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2966         (*(ctxt->space) == -2))
2967 	return(0);
2968 
2969     /*
2970      * Check that the string is made of blanks
2971      */
2972     if (blank_chars == 0) {
2973 	for (i = 0;i < len;i++)
2974 	    if (!(IS_BLANK_CH(str[i]))) return(0);
2975     }
2976 
2977     /*
2978      * Look if the element is mixed content in the DTD if available
2979      */
2980     if (ctxt->node == NULL) return(0);
2981     if (ctxt->myDoc != NULL) {
2982 	ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2983         if (ret == 0) return(1);
2984         if (ret == 1) return(0);
2985     }
2986 
2987     /*
2988      * Otherwise, heuristic :-\
2989      */
2990     if ((RAW != '<') && (RAW != 0xD)) return(0);
2991     if ((ctxt->node->children == NULL) &&
2992 	(RAW == '<') && (NXT(1) == '/')) return(0);
2993 
2994     lastChild = xmlGetLastChild(ctxt->node);
2995     if (lastChild == NULL) {
2996         if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2997             (ctxt->node->content != NULL)) return(0);
2998     } else if (xmlNodeIsText(lastChild))
2999         return(0);
3000     else if ((ctxt->node->children != NULL) &&
3001              (xmlNodeIsText(ctxt->node->children)))
3002         return(0);
3003     return(1);
3004 }
3005 
3006 /************************************************************************
3007  *									*
3008  *		Extra stuff for namespace support			*
3009  *	Relates to http://www.w3.org/TR/WD-xml-names			*
3010  *									*
3011  ************************************************************************/
3012 
3013 /**
3014  * xmlSplitQName:
3015  * @ctxt:  an XML parser context
3016  * @name:  an XML parser context
3017  * @prefix:  a xmlChar **
3018  *
3019  * parse an UTF8 encoded XML qualified name string
3020  *
3021  * [NS 5] QName ::= (Prefix ':')? LocalPart
3022  *
3023  * [NS 6] Prefix ::= NCName
3024  *
3025  * [NS 7] LocalPart ::= NCName
3026  *
3027  * Returns the local part, and prefix is updated
3028  *   to get the Prefix if any.
3029  */
3030 
3031 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefix)3032 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3033     xmlChar buf[XML_MAX_NAMELEN + 5];
3034     xmlChar *buffer = NULL;
3035     int len = 0;
3036     int max = XML_MAX_NAMELEN;
3037     xmlChar *ret = NULL;
3038     const xmlChar *cur = name;
3039     int c;
3040 
3041     if (prefix == NULL) return(NULL);
3042     *prefix = NULL;
3043 
3044     if (cur == NULL) return(NULL);
3045 
3046 #ifndef XML_XML_NAMESPACE
3047     /* xml: prefix is not really a namespace */
3048     if ((cur[0] == 'x') && (cur[1] == 'm') &&
3049         (cur[2] == 'l') && (cur[3] == ':'))
3050 	return(xmlStrdup(name));
3051 #endif
3052 
3053     /* nasty but well=formed */
3054     if (cur[0] == ':')
3055 	return(xmlStrdup(name));
3056 
3057     c = *cur++;
3058     while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3059 	buf[len++] = c;
3060 	c = *cur++;
3061     }
3062     if (len >= max) {
3063 	/*
3064 	 * Okay someone managed to make a huge name, so he's ready to pay
3065 	 * for the processing speed.
3066 	 */
3067 	max = len * 2;
3068 
3069 	buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3070 	if (buffer == NULL) {
3071 	    xmlErrMemory(ctxt, NULL);
3072 	    return(NULL);
3073 	}
3074 	memcpy(buffer, buf, len);
3075 	while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3076 	    if (len + 10 > max) {
3077 	        xmlChar *tmp;
3078 
3079 		max *= 2;
3080 		tmp = (xmlChar *) xmlRealloc(buffer,
3081 						max * sizeof(xmlChar));
3082 		if (tmp == NULL) {
3083 		    xmlFree(buffer);
3084 		    xmlErrMemory(ctxt, NULL);
3085 		    return(NULL);
3086 		}
3087 		buffer = tmp;
3088 	    }
3089 	    buffer[len++] = c;
3090 	    c = *cur++;
3091 	}
3092 	buffer[len] = 0;
3093     }
3094 
3095     if ((c == ':') && (*cur == 0)) {
3096         if (buffer != NULL)
3097 	    xmlFree(buffer);
3098 	*prefix = NULL;
3099 	return(xmlStrdup(name));
3100     }
3101 
3102     if (buffer == NULL)
3103 	ret = xmlStrndup(buf, len);
3104     else {
3105 	ret = buffer;
3106 	buffer = NULL;
3107 	max = XML_MAX_NAMELEN;
3108     }
3109 
3110 
3111     if (c == ':') {
3112 	c = *cur;
3113         *prefix = ret;
3114 	if (c == 0) {
3115 	    return(xmlStrndup(BAD_CAST "", 0));
3116 	}
3117 	len = 0;
3118 
3119 	/*
3120 	 * Check that the first character is proper to start
3121 	 * a new name
3122 	 */
3123 	if (!(((c >= 0x61) && (c <= 0x7A)) ||
3124 	      ((c >= 0x41) && (c <= 0x5A)) ||
3125 	      (c == '_') || (c == ':'))) {
3126 	    int l;
3127 	    int first = CUR_SCHAR(cur, l);
3128 
3129 	    if (!IS_LETTER(first) && (first != '_')) {
3130 		xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3131 			    "Name %s is not XML Namespace compliant\n",
3132 				  name);
3133 	    }
3134 	}
3135 	cur++;
3136 
3137 	while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3138 	    buf[len++] = c;
3139 	    c = *cur++;
3140 	}
3141 	if (len >= max) {
3142 	    /*
3143 	     * Okay someone managed to make a huge name, so he's ready to pay
3144 	     * for the processing speed.
3145 	     */
3146 	    max = len * 2;
3147 
3148 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3149 	    if (buffer == NULL) {
3150 	        xmlErrMemory(ctxt, NULL);
3151 		return(NULL);
3152 	    }
3153 	    memcpy(buffer, buf, len);
3154 	    while (c != 0) { /* tested bigname2.xml */
3155 		if (len + 10 > max) {
3156 		    xmlChar *tmp;
3157 
3158 		    max *= 2;
3159 		    tmp = (xmlChar *) xmlRealloc(buffer,
3160 						    max * sizeof(xmlChar));
3161 		    if (tmp == NULL) {
3162 			xmlErrMemory(ctxt, NULL);
3163 			xmlFree(buffer);
3164 			return(NULL);
3165 		    }
3166 		    buffer = tmp;
3167 		}
3168 		buffer[len++] = c;
3169 		c = *cur++;
3170 	    }
3171 	    buffer[len] = 0;
3172 	}
3173 
3174 	if (buffer == NULL)
3175 	    ret = xmlStrndup(buf, len);
3176 	else {
3177 	    ret = buffer;
3178 	}
3179     }
3180 
3181     return(ret);
3182 }
3183 
3184 /************************************************************************
3185  *									*
3186  *			The parser itself				*
3187  *	Relates to http://www.w3.org/TR/REC-xml				*
3188  *									*
3189  ************************************************************************/
3190 
3191 /************************************************************************
3192  *									*
3193  *	Routines to parse Name, NCName and NmToken			*
3194  *									*
3195  ************************************************************************/
3196 #ifdef DEBUG
3197 static unsigned long nbParseName = 0;
3198 static unsigned long nbParseNmToken = 0;
3199 static unsigned long nbParseNCName = 0;
3200 static unsigned long nbParseNCNameComplex = 0;
3201 static unsigned long nbParseNameComplex = 0;
3202 static unsigned long nbParseStringName = 0;
3203 #endif
3204 
3205 /*
3206  * The two following functions are related to the change of accepted
3207  * characters for Name and NmToken in the Revision 5 of XML-1.0
3208  * They correspond to the modified production [4] and the new production [4a]
3209  * changes in that revision. Also note that the macros used for the
3210  * productions Letter, Digit, CombiningChar and Extender are not needed
3211  * anymore.
3212  * We still keep compatibility to pre-revision5 parsing semantic if the
3213  * new XML_PARSE_OLD10 option is given to the parser.
3214  */
3215 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3216 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3217     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3218         /*
3219 	 * Use the new checks of production [4] [4a] amd [5] of the
3220 	 * Update 5 of XML-1.0
3221 	 */
3222 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3223 	    (((c >= 'a') && (c <= 'z')) ||
3224 	     ((c >= 'A') && (c <= 'Z')) ||
3225 	     (c == '_') || (c == ':') ||
3226 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3227 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3228 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3229 	     ((c >= 0x370) && (c <= 0x37D)) ||
3230 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3231 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3232 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3233 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3234 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3235 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3236 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3237 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3238 	    return(1);
3239     } else {
3240         if (IS_LETTER(c) || (c == '_') || (c == ':'))
3241 	    return(1);
3242     }
3243     return(0);
3244 }
3245 
3246 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3247 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3248     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3249         /*
3250 	 * Use the new checks of production [4] [4a] amd [5] of the
3251 	 * Update 5 of XML-1.0
3252 	 */
3253 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3254 	    (((c >= 'a') && (c <= 'z')) ||
3255 	     ((c >= 'A') && (c <= 'Z')) ||
3256 	     ((c >= '0') && (c <= '9')) || /* !start */
3257 	     (c == '_') || (c == ':') ||
3258 	     (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3259 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3260 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3261 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3262 	     ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3263 	     ((c >= 0x370) && (c <= 0x37D)) ||
3264 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3265 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3266 	     ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3267 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3268 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3269 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3270 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3271 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3272 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3273 	     return(1);
3274     } else {
3275         if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3276             (c == '.') || (c == '-') ||
3277 	    (c == '_') || (c == ':') ||
3278 	    (IS_COMBINING(c)) ||
3279 	    (IS_EXTENDER(c)))
3280 	    return(1);
3281     }
3282     return(0);
3283 }
3284 
3285 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3286                                           int *len, int *alloc, int normalize);
3287 
3288 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3289 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3290     int len = 0, l;
3291     int c;
3292     int count = 0;
3293 
3294 #ifdef DEBUG
3295     nbParseNameComplex++;
3296 #endif
3297 
3298     /*
3299      * Handler for more complex cases
3300      */
3301     GROW;
3302     if (ctxt->instate == XML_PARSER_EOF)
3303         return(NULL);
3304     c = CUR_CHAR(l);
3305     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3306         /*
3307 	 * Use the new checks of production [4] [4a] amd [5] of the
3308 	 * Update 5 of XML-1.0
3309 	 */
3310 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3311 	    (!(((c >= 'a') && (c <= 'z')) ||
3312 	       ((c >= 'A') && (c <= 'Z')) ||
3313 	       (c == '_') || (c == ':') ||
3314 	       ((c >= 0xC0) && (c <= 0xD6)) ||
3315 	       ((c >= 0xD8) && (c <= 0xF6)) ||
3316 	       ((c >= 0xF8) && (c <= 0x2FF)) ||
3317 	       ((c >= 0x370) && (c <= 0x37D)) ||
3318 	       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3319 	       ((c >= 0x200C) && (c <= 0x200D)) ||
3320 	       ((c >= 0x2070) && (c <= 0x218F)) ||
3321 	       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3322 	       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3323 	       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3324 	       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3325 	       ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3326 	    return(NULL);
3327 	}
3328 	len += l;
3329 	NEXTL(l);
3330 	c = CUR_CHAR(l);
3331 	while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3332 	       (((c >= 'a') && (c <= 'z')) ||
3333 	        ((c >= 'A') && (c <= 'Z')) ||
3334 	        ((c >= '0') && (c <= '9')) || /* !start */
3335 	        (c == '_') || (c == ':') ||
3336 	        (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3337 	        ((c >= 0xC0) && (c <= 0xD6)) ||
3338 	        ((c >= 0xD8) && (c <= 0xF6)) ||
3339 	        ((c >= 0xF8) && (c <= 0x2FF)) ||
3340 	        ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3341 	        ((c >= 0x370) && (c <= 0x37D)) ||
3342 	        ((c >= 0x37F) && (c <= 0x1FFF)) ||
3343 	        ((c >= 0x200C) && (c <= 0x200D)) ||
3344 	        ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3345 	        ((c >= 0x2070) && (c <= 0x218F)) ||
3346 	        ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3347 	        ((c >= 0x3001) && (c <= 0xD7FF)) ||
3348 	        ((c >= 0xF900) && (c <= 0xFDCF)) ||
3349 	        ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3350 	        ((c >= 0x10000) && (c <= 0xEFFFF))
3351 		)) {
3352 	    if (count++ > XML_PARSER_CHUNK_SIZE) {
3353 		count = 0;
3354 		GROW;
3355                 if (ctxt->instate == XML_PARSER_EOF)
3356                     return(NULL);
3357 	    }
3358 	    len += l;
3359 	    NEXTL(l);
3360 	    c = CUR_CHAR(l);
3361 	}
3362     } else {
3363 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3364 	    (!IS_LETTER(c) && (c != '_') &&
3365 	     (c != ':'))) {
3366 	    return(NULL);
3367 	}
3368 	len += l;
3369 	NEXTL(l);
3370 	c = CUR_CHAR(l);
3371 
3372 	while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3373 	       ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3374 		(c == '.') || (c == '-') ||
3375 		(c == '_') || (c == ':') ||
3376 		(IS_COMBINING(c)) ||
3377 		(IS_EXTENDER(c)))) {
3378 	    if (count++ > XML_PARSER_CHUNK_SIZE) {
3379 		count = 0;
3380 		GROW;
3381                 if (ctxt->instate == XML_PARSER_EOF)
3382                     return(NULL);
3383 	    }
3384 	    len += l;
3385 	    NEXTL(l);
3386 	    c = CUR_CHAR(l);
3387 	    if (c == 0) {
3388 		count = 0;
3389 		GROW;
3390                 if (ctxt->instate == XML_PARSER_EOF)
3391                     return(NULL);
3392 		c = CUR_CHAR(l);
3393 	    }
3394 	}
3395     }
3396     if ((len > XML_MAX_NAME_LENGTH) &&
3397         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3398         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3399         return(NULL);
3400     }
3401     if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3402         return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3403     return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3404 }
3405 
3406 /**
3407  * xmlParseName:
3408  * @ctxt:  an XML parser context
3409  *
3410  * parse an XML name.
3411  *
3412  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3413  *                  CombiningChar | Extender
3414  *
3415  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3416  *
3417  * [6] Names ::= Name (#x20 Name)*
3418  *
3419  * Returns the Name parsed or NULL
3420  */
3421 
3422 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3423 xmlParseName(xmlParserCtxtPtr ctxt) {
3424     const xmlChar *in;
3425     const xmlChar *ret;
3426     int count = 0;
3427 
3428     GROW;
3429 
3430 #ifdef DEBUG
3431     nbParseName++;
3432 #endif
3433 
3434     /*
3435      * Accelerator for simple ASCII names
3436      */
3437     in = ctxt->input->cur;
3438     if (((*in >= 0x61) && (*in <= 0x7A)) ||
3439 	((*in >= 0x41) && (*in <= 0x5A)) ||
3440 	(*in == '_') || (*in == ':')) {
3441 	in++;
3442 	while (((*in >= 0x61) && (*in <= 0x7A)) ||
3443 	       ((*in >= 0x41) && (*in <= 0x5A)) ||
3444 	       ((*in >= 0x30) && (*in <= 0x39)) ||
3445 	       (*in == '_') || (*in == '-') ||
3446 	       (*in == ':') || (*in == '.'))
3447 	    in++;
3448 	if ((*in > 0) && (*in < 0x80)) {
3449 	    count = in - ctxt->input->cur;
3450             if ((count > XML_MAX_NAME_LENGTH) &&
3451                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3452                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3453                 return(NULL);
3454             }
3455 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3456 	    ctxt->input->cur = in;
3457 	    ctxt->nbChars += count;
3458 	    ctxt->input->col += count;
3459 	    if (ret == NULL)
3460 	        xmlErrMemory(ctxt, NULL);
3461 	    return(ret);
3462 	}
3463     }
3464     /* accelerator for special cases */
3465     return(xmlParseNameComplex(ctxt));
3466 }
3467 
3468 static const xmlChar *
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3469 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3470     int len = 0, l;
3471     int c;
3472     int count = 0;
3473     const xmlChar *end; /* needed because CUR_CHAR() can move cur on \r\n */
3474 
3475 #ifdef DEBUG
3476     nbParseNCNameComplex++;
3477 #endif
3478 
3479     /*
3480      * Handler for more complex cases
3481      */
3482     GROW;
3483     end = ctxt->input->cur;
3484     c = CUR_CHAR(l);
3485     if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3486 	(!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3487 	return(NULL);
3488     }
3489 
3490     while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3491 	   (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3492 	if (count++ > XML_PARSER_CHUNK_SIZE) {
3493             if ((len > XML_MAX_NAME_LENGTH) &&
3494                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3495                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3496                 return(NULL);
3497             }
3498 	    count = 0;
3499 	    GROW;
3500             if (ctxt->instate == XML_PARSER_EOF)
3501                 return(NULL);
3502 	}
3503 	len += l;
3504 	NEXTL(l);
3505 	end = ctxt->input->cur;
3506 	c = CUR_CHAR(l);
3507 	if (c == 0) {
3508 	    count = 0;
3509 	    /*
3510 	     * when shrinking to extend the buffer we really need to preserve
3511 	     * the part of the name we already parsed. Hence rolling back
3512 	     * by current lenght.
3513 	     */
3514 	    ctxt->input->cur -= l;
3515 	    GROW;
3516 	    ctxt->input->cur += l;
3517             if (ctxt->instate == XML_PARSER_EOF)
3518                 return(NULL);
3519 	    end = ctxt->input->cur;
3520 	    c = CUR_CHAR(l);
3521 	}
3522     }
3523     if ((len > XML_MAX_NAME_LENGTH) &&
3524         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3525         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3526         return(NULL);
3527     }
3528     return(xmlDictLookup(ctxt->dict, end - len, len));
3529 }
3530 
3531 /**
3532  * xmlParseNCName:
3533  * @ctxt:  an XML parser context
3534  * @len:  length of the string parsed
3535  *
3536  * parse an XML name.
3537  *
3538  * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3539  *                      CombiningChar | Extender
3540  *
3541  * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3542  *
3543  * Returns the Name parsed or NULL
3544  */
3545 
3546 static const xmlChar *
xmlParseNCName(xmlParserCtxtPtr ctxt)3547 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3548     const xmlChar *in, *e;
3549     const xmlChar *ret;
3550     int count = 0;
3551 
3552 #ifdef DEBUG
3553     nbParseNCName++;
3554 #endif
3555 
3556     /*
3557      * Accelerator for simple ASCII names
3558      */
3559     in = ctxt->input->cur;
3560     e = ctxt->input->end;
3561     if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3562 	 ((*in >= 0x41) && (*in <= 0x5A)) ||
3563 	 (*in == '_')) && (in < e)) {
3564 	in++;
3565 	while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3566 	        ((*in >= 0x41) && (*in <= 0x5A)) ||
3567 	        ((*in >= 0x30) && (*in <= 0x39)) ||
3568 	        (*in == '_') || (*in == '-') ||
3569 	        (*in == '.')) && (in < e))
3570 	    in++;
3571 	if (in >= e)
3572 	    goto complex;
3573 	if ((*in > 0) && (*in < 0x80)) {
3574 	    count = in - ctxt->input->cur;
3575             if ((count > XML_MAX_NAME_LENGTH) &&
3576                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3577                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3578                 return(NULL);
3579             }
3580 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3581 	    ctxt->input->cur = in;
3582 	    ctxt->nbChars += count;
3583 	    ctxt->input->col += count;
3584 	    if (ret == NULL) {
3585 	        xmlErrMemory(ctxt, NULL);
3586 	    }
3587 	    return(ret);
3588 	}
3589     }
3590 complex:
3591     return(xmlParseNCNameComplex(ctxt));
3592 }
3593 
3594 /**
3595  * xmlParseNameAndCompare:
3596  * @ctxt:  an XML parser context
3597  *
3598  * parse an XML name and compares for match
3599  * (specialized for endtag parsing)
3600  *
3601  * Returns NULL for an illegal name, (xmlChar*) 1 for success
3602  * and the name for mismatch
3603  */
3604 
3605 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3606 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3607     register const xmlChar *cmp = other;
3608     register const xmlChar *in;
3609     const xmlChar *ret;
3610 
3611     GROW;
3612     if (ctxt->instate == XML_PARSER_EOF)
3613         return(NULL);
3614 
3615     in = ctxt->input->cur;
3616     while (*in != 0 && *in == *cmp) {
3617 	++in;
3618 	++cmp;
3619 	ctxt->input->col++;
3620     }
3621     if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3622 	/* success */
3623 	ctxt->input->cur = in;
3624 	return (const xmlChar*) 1;
3625     }
3626     /* failure (or end of input buffer), check with full function */
3627     ret = xmlParseName (ctxt);
3628     /* strings coming from the dictionnary direct compare possible */
3629     if (ret == other) {
3630 	return (const xmlChar*) 1;
3631     }
3632     return ret;
3633 }
3634 
3635 /**
3636  * xmlParseStringName:
3637  * @ctxt:  an XML parser context
3638  * @str:  a pointer to the string pointer (IN/OUT)
3639  *
3640  * parse an XML name.
3641  *
3642  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3643  *                  CombiningChar | Extender
3644  *
3645  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3646  *
3647  * [6] Names ::= Name (#x20 Name)*
3648  *
3649  * Returns the Name parsed or NULL. The @str pointer
3650  * is updated to the current location in the string.
3651  */
3652 
3653 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3654 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3655     xmlChar buf[XML_MAX_NAMELEN + 5];
3656     const xmlChar *cur = *str;
3657     int len = 0, l;
3658     int c;
3659 
3660 #ifdef DEBUG
3661     nbParseStringName++;
3662 #endif
3663 
3664     c = CUR_SCHAR(cur, l);
3665     if (!xmlIsNameStartChar(ctxt, c)) {
3666 	return(NULL);
3667     }
3668 
3669     COPY_BUF(l,buf,len,c);
3670     cur += l;
3671     c = CUR_SCHAR(cur, l);
3672     while (xmlIsNameChar(ctxt, c)) {
3673 	COPY_BUF(l,buf,len,c);
3674 	cur += l;
3675 	c = CUR_SCHAR(cur, l);
3676 	if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3677 	    /*
3678 	     * Okay someone managed to make a huge name, so he's ready to pay
3679 	     * for the processing speed.
3680 	     */
3681 	    xmlChar *buffer;
3682 	    int max = len * 2;
3683 
3684 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3685 	    if (buffer == NULL) {
3686 	        xmlErrMemory(ctxt, NULL);
3687 		return(NULL);
3688 	    }
3689 	    memcpy(buffer, buf, len);
3690 	    while (xmlIsNameChar(ctxt, c)) {
3691 		if (len + 10 > max) {
3692 		    xmlChar *tmp;
3693 
3694                     if ((len > XML_MAX_NAME_LENGTH) &&
3695                         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3696                         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3697 			xmlFree(buffer);
3698                         return(NULL);
3699                     }
3700 		    max *= 2;
3701 		    tmp = (xmlChar *) xmlRealloc(buffer,
3702 			                            max * sizeof(xmlChar));
3703 		    if (tmp == NULL) {
3704 			xmlErrMemory(ctxt, NULL);
3705 			xmlFree(buffer);
3706 			return(NULL);
3707 		    }
3708 		    buffer = tmp;
3709 		}
3710 		COPY_BUF(l,buffer,len,c);
3711 		cur += l;
3712 		c = CUR_SCHAR(cur, l);
3713 	    }
3714 	    buffer[len] = 0;
3715 	    *str = cur;
3716 	    return(buffer);
3717 	}
3718     }
3719     if ((len > XML_MAX_NAME_LENGTH) &&
3720         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3721         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3722         return(NULL);
3723     }
3724     *str = cur;
3725     return(xmlStrndup(buf, len));
3726 }
3727 
3728 /**
3729  * xmlParseNmtoken:
3730  * @ctxt:  an XML parser context
3731  *
3732  * parse an XML Nmtoken.
3733  *
3734  * [7] Nmtoken ::= (NameChar)+
3735  *
3736  * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3737  *
3738  * Returns the Nmtoken parsed or NULL
3739  */
3740 
3741 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3742 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3743     xmlChar buf[XML_MAX_NAMELEN + 5];
3744     int len = 0, l;
3745     int c;
3746     int count = 0;
3747 
3748 #ifdef DEBUG
3749     nbParseNmToken++;
3750 #endif
3751 
3752     GROW;
3753     if (ctxt->instate == XML_PARSER_EOF)
3754         return(NULL);
3755     c = CUR_CHAR(l);
3756 
3757     while (xmlIsNameChar(ctxt, c)) {
3758 	if (count++ > XML_PARSER_CHUNK_SIZE) {
3759 	    count = 0;
3760 	    GROW;
3761 	}
3762 	COPY_BUF(l,buf,len,c);
3763 	NEXTL(l);
3764 	c = CUR_CHAR(l);
3765 	if (c == 0) {
3766 	    count = 0;
3767 	    GROW;
3768 	    if (ctxt->instate == XML_PARSER_EOF)
3769 		return(NULL);
3770             c = CUR_CHAR(l);
3771 	}
3772 	if (len >= XML_MAX_NAMELEN) {
3773 	    /*
3774 	     * Okay someone managed to make a huge token, so he's ready to pay
3775 	     * for the processing speed.
3776 	     */
3777 	    xmlChar *buffer;
3778 	    int max = len * 2;
3779 
3780 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3781 	    if (buffer == NULL) {
3782 	        xmlErrMemory(ctxt, NULL);
3783 		return(NULL);
3784 	    }
3785 	    memcpy(buffer, buf, len);
3786 	    while (xmlIsNameChar(ctxt, c)) {
3787 		if (count++ > XML_PARSER_CHUNK_SIZE) {
3788 		    count = 0;
3789 		    GROW;
3790                     if (ctxt->instate == XML_PARSER_EOF) {
3791                         xmlFree(buffer);
3792                         return(NULL);
3793                     }
3794 		}
3795 		if (len + 10 > max) {
3796 		    xmlChar *tmp;
3797 
3798                     if ((max > XML_MAX_NAME_LENGTH) &&
3799                         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3800                         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3801                         xmlFree(buffer);
3802                         return(NULL);
3803                     }
3804 		    max *= 2;
3805 		    tmp = (xmlChar *) xmlRealloc(buffer,
3806 			                            max * sizeof(xmlChar));
3807 		    if (tmp == NULL) {
3808 			xmlErrMemory(ctxt, NULL);
3809 			xmlFree(buffer);
3810 			return(NULL);
3811 		    }
3812 		    buffer = tmp;
3813 		}
3814 		COPY_BUF(l,buffer,len,c);
3815 		NEXTL(l);
3816 		c = CUR_CHAR(l);
3817 	    }
3818 	    buffer[len] = 0;
3819 	    return(buffer);
3820 	}
3821     }
3822     if (len == 0)
3823         return(NULL);
3824     if ((len > XML_MAX_NAME_LENGTH) &&
3825         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3826         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3827         return(NULL);
3828     }
3829     return(xmlStrndup(buf, len));
3830 }
3831 
3832 /**
3833  * xmlParseEntityValue:
3834  * @ctxt:  an XML parser context
3835  * @orig:  if non-NULL store a copy of the original entity value
3836  *
3837  * parse a value for ENTITY declarations
3838  *
3839  * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3840  *	               "'" ([^%&'] | PEReference | Reference)* "'"
3841  *
3842  * Returns the EntityValue parsed with reference substituted or NULL
3843  */
3844 
3845 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3846 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3847     xmlChar *buf = NULL;
3848     int len = 0;
3849     int size = XML_PARSER_BUFFER_SIZE;
3850     int c, l;
3851     xmlChar stop;
3852     xmlChar *ret = NULL;
3853     const xmlChar *cur = NULL;
3854     xmlParserInputPtr input;
3855 
3856     if (RAW == '"') stop = '"';
3857     else if (RAW == '\'') stop = '\'';
3858     else {
3859 	xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3860 	return(NULL);
3861     }
3862     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3863     if (buf == NULL) {
3864 	xmlErrMemory(ctxt, NULL);
3865 	return(NULL);
3866     }
3867 
3868     /*
3869      * The content of the entity definition is copied in a buffer.
3870      */
3871 
3872     ctxt->instate = XML_PARSER_ENTITY_VALUE;
3873     input = ctxt->input;
3874     GROW;
3875     if (ctxt->instate == XML_PARSER_EOF) {
3876         xmlFree(buf);
3877         return(NULL);
3878     }
3879     NEXT;
3880     c = CUR_CHAR(l);
3881     /*
3882      * NOTE: 4.4.5 Included in Literal
3883      * When a parameter entity reference appears in a literal entity
3884      * value, ... a single or double quote character in the replacement
3885      * text is always treated as a normal data character and will not
3886      * terminate the literal.
3887      * In practice it means we stop the loop only when back at parsing
3888      * the initial entity and the quote is found
3889      */
3890     while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3891 	    (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3892 	if (len + 5 >= size) {
3893 	    xmlChar *tmp;
3894 
3895 	    size *= 2;
3896 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3897 	    if (tmp == NULL) {
3898 		xmlErrMemory(ctxt, NULL);
3899 		xmlFree(buf);
3900 		return(NULL);
3901 	    }
3902 	    buf = tmp;
3903 	}
3904 	COPY_BUF(l,buf,len,c);
3905 	NEXTL(l);
3906 	/*
3907 	 * Pop-up of finished entities.
3908 	 */
3909 	while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3910 	    xmlPopInput(ctxt);
3911 
3912 	GROW;
3913 	c = CUR_CHAR(l);
3914 	if (c == 0) {
3915 	    GROW;
3916 	    c = CUR_CHAR(l);
3917 	}
3918     }
3919     buf[len] = 0;
3920     if (ctxt->instate == XML_PARSER_EOF) {
3921         xmlFree(buf);
3922         return(NULL);
3923     }
3924 
3925     /*
3926      * Raise problem w.r.t. '&' and '%' being used in non-entities
3927      * reference constructs. Note Charref will be handled in
3928      * xmlStringDecodeEntities()
3929      */
3930     cur = buf;
3931     while (*cur != 0) { /* non input consuming */
3932 	if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3933 	    xmlChar *name;
3934 	    xmlChar tmp = *cur;
3935 
3936 	    cur++;
3937 	    name = xmlParseStringName(ctxt, &cur);
3938             if ((name == NULL) || (*cur != ';')) {
3939 		xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3940 	    "EntityValue: '%c' forbidden except for entities references\n",
3941 	                          tmp);
3942 	    }
3943 	    if ((tmp == '%') && (ctxt->inSubset == 1) &&
3944 		(ctxt->inputNr == 1)) {
3945 		xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3946 	    }
3947 	    if (name != NULL)
3948 		xmlFree(name);
3949 	    if (*cur == 0)
3950 	        break;
3951 	}
3952 	cur++;
3953     }
3954 
3955     /*
3956      * Then PEReference entities are substituted.
3957      */
3958     if (c != stop) {
3959 	xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3960 	xmlFree(buf);
3961     } else {
3962 	NEXT;
3963 	/*
3964 	 * NOTE: 4.4.7 Bypassed
3965 	 * When a general entity reference appears in the EntityValue in
3966 	 * an entity declaration, it is bypassed and left as is.
3967 	 * so XML_SUBSTITUTE_REF is not set here.
3968 	 */
3969 	ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3970 				      0, 0, 0);
3971 	if (orig != NULL)
3972 	    *orig = buf;
3973 	else
3974 	    xmlFree(buf);
3975     }
3976 
3977     return(ret);
3978 }
3979 
3980 /**
3981  * xmlParseAttValueComplex:
3982  * @ctxt:  an XML parser context
3983  * @len:   the resulting attribute len
3984  * @normalize:  wether to apply the inner normalization
3985  *
3986  * parse a value for an attribute, this is the fallback function
3987  * of xmlParseAttValue() when the attribute parsing requires handling
3988  * of non-ASCII characters, or normalization compaction.
3989  *
3990  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3991  */
3992 static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt,int * attlen,int normalize)3993 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3994     xmlChar limit = 0;
3995     xmlChar *buf = NULL;
3996     xmlChar *rep = NULL;
3997     size_t len = 0;
3998     size_t buf_size = 0;
3999     int c, l, in_space = 0;
4000     xmlChar *current = NULL;
4001     xmlEntityPtr ent;
4002 
4003     if (NXT(0) == '"') {
4004 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4005 	limit = '"';
4006         NEXT;
4007     } else if (NXT(0) == '\'') {
4008 	limit = '\'';
4009 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4010         NEXT;
4011     } else {
4012 	xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4013 	return(NULL);
4014     }
4015 
4016     /*
4017      * allocate a translation buffer.
4018      */
4019     buf_size = XML_PARSER_BUFFER_SIZE;
4020     buf = (xmlChar *) xmlMallocAtomic(buf_size);
4021     if (buf == NULL) goto mem_error;
4022 
4023     /*
4024      * OK loop until we reach one of the ending char or a size limit.
4025      */
4026     c = CUR_CHAR(l);
4027     while (((NXT(0) != limit) && /* checked */
4028             (IS_CHAR(c)) && (c != '<')) &&
4029             (ctxt->instate != XML_PARSER_EOF)) {
4030         /*
4031          * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
4032          * special option is given
4033          */
4034         if ((len > XML_MAX_TEXT_LENGTH) &&
4035             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4036             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4037                            "AttValue length too long\n");
4038             goto mem_error;
4039         }
4040 	if (c == 0) break;
4041 	if (c == '&') {
4042 	    in_space = 0;
4043 	    if (NXT(1) == '#') {
4044 		int val = xmlParseCharRef(ctxt);
4045 
4046 		if (val == '&') {
4047 		    if (ctxt->replaceEntities) {
4048 			if (len + 10 > buf_size) {
4049 			    growBuffer(buf, 10);
4050 			}
4051 			buf[len++] = '&';
4052 		    } else {
4053 			/*
4054 			 * The reparsing will be done in xmlStringGetNodeList()
4055 			 * called by the attribute() function in SAX.c
4056 			 */
4057 			if (len + 10 > buf_size) {
4058 			    growBuffer(buf, 10);
4059 			}
4060 			buf[len++] = '&';
4061 			buf[len++] = '#';
4062 			buf[len++] = '3';
4063 			buf[len++] = '8';
4064 			buf[len++] = ';';
4065 		    }
4066 		} else if (val != 0) {
4067 		    if (len + 10 > buf_size) {
4068 			growBuffer(buf, 10);
4069 		    }
4070 		    len += xmlCopyChar(0, &buf[len], val);
4071 		}
4072 	    } else {
4073 		ent = xmlParseEntityRef(ctxt);
4074 		ctxt->nbentities++;
4075 		if (ent != NULL)
4076 		    ctxt->nbentities += ent->owner;
4077 		if ((ent != NULL) &&
4078 		    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4079 		    if (len + 10 > buf_size) {
4080 			growBuffer(buf, 10);
4081 		    }
4082 		    if ((ctxt->replaceEntities == 0) &&
4083 		        (ent->content[0] == '&')) {
4084 			buf[len++] = '&';
4085 			buf[len++] = '#';
4086 			buf[len++] = '3';
4087 			buf[len++] = '8';
4088 			buf[len++] = ';';
4089 		    } else {
4090 			buf[len++] = ent->content[0];
4091 		    }
4092 		} else if ((ent != NULL) &&
4093 		           (ctxt->replaceEntities != 0)) {
4094 		    if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4095 			rep = xmlStringDecodeEntities(ctxt, ent->content,
4096 						      XML_SUBSTITUTE_REF,
4097 						      0, 0, 0);
4098 			if (rep != NULL) {
4099 			    current = rep;
4100 			    while (*current != 0) { /* non input consuming */
4101                                 if ((*current == 0xD) || (*current == 0xA) ||
4102                                     (*current == 0x9)) {
4103                                     buf[len++] = 0x20;
4104                                     current++;
4105                                 } else
4106                                     buf[len++] = *current++;
4107 				if (len + 10 > buf_size) {
4108 				    growBuffer(buf, 10);
4109 				}
4110 			    }
4111 			    xmlFree(rep);
4112 			    rep = NULL;
4113 			}
4114 		    } else {
4115 			if (len + 10 > buf_size) {
4116 			    growBuffer(buf, 10);
4117 			}
4118 			if (ent->content != NULL)
4119 			    buf[len++] = ent->content[0];
4120 		    }
4121 		} else if (ent != NULL) {
4122 		    int i = xmlStrlen(ent->name);
4123 		    const xmlChar *cur = ent->name;
4124 
4125 		    /*
4126 		     * This may look absurd but is needed to detect
4127 		     * entities problems
4128 		     */
4129 		    if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4130 			(ent->content != NULL) && (ent->checked == 0)) {
4131 			unsigned long oldnbent = ctxt->nbentities;
4132 
4133 			rep = xmlStringDecodeEntities(ctxt, ent->content,
4134 						  XML_SUBSTITUTE_REF, 0, 0, 0);
4135 
4136 			ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
4137 			if (rep != NULL) {
4138 			    if (xmlStrchr(rep, '<'))
4139 			        ent->checked |= 1;
4140 			    xmlFree(rep);
4141 			    rep = NULL;
4142 			}
4143 		    }
4144 
4145 		    /*
4146 		     * Just output the reference
4147 		     */
4148 		    buf[len++] = '&';
4149 		    while (len + i + 10 > buf_size) {
4150 			growBuffer(buf, i + 10);
4151 		    }
4152 		    for (;i > 0;i--)
4153 			buf[len++] = *cur++;
4154 		    buf[len++] = ';';
4155 		}
4156 	    }
4157 	} else {
4158 	    if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4159 	        if ((len != 0) || (!normalize)) {
4160 		    if ((!normalize) || (!in_space)) {
4161 			COPY_BUF(l,buf,len,0x20);
4162 			while (len + 10 > buf_size) {
4163 			    growBuffer(buf, 10);
4164 			}
4165 		    }
4166 		    in_space = 1;
4167 		}
4168 	    } else {
4169 	        in_space = 0;
4170 		COPY_BUF(l,buf,len,c);
4171 		if (len + 10 > buf_size) {
4172 		    growBuffer(buf, 10);
4173 		}
4174 	    }
4175 	    NEXTL(l);
4176 	}
4177 	GROW;
4178 	c = CUR_CHAR(l);
4179     }
4180     if (ctxt->instate == XML_PARSER_EOF)
4181         goto error;
4182 
4183     if ((in_space) && (normalize)) {
4184         while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4185     }
4186     buf[len] = 0;
4187     if (RAW == '<') {
4188 	xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4189     } else if (RAW != limit) {
4190 	if ((c != 0) && (!IS_CHAR(c))) {
4191 	    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4192 			   "invalid character in attribute value\n");
4193 	} else {
4194 	    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4195 			   "AttValue: ' expected\n");
4196         }
4197     } else
4198 	NEXT;
4199 
4200     /*
4201      * There we potentially risk an overflow, don't allow attribute value of
4202      * length more than INT_MAX it is a very reasonnable assumption !
4203      */
4204     if (len >= INT_MAX) {
4205         xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4206                        "AttValue length too long\n");
4207         goto mem_error;
4208     }
4209 
4210     if (attlen != NULL) *attlen = (int) len;
4211     return(buf);
4212 
4213 mem_error:
4214     xmlErrMemory(ctxt, NULL);
4215 error:
4216     if (buf != NULL)
4217         xmlFree(buf);
4218     if (rep != NULL)
4219         xmlFree(rep);
4220     return(NULL);
4221 }
4222 
4223 /**
4224  * xmlParseAttValue:
4225  * @ctxt:  an XML parser context
4226  *
4227  * parse a value for an attribute
4228  * Note: the parser won't do substitution of entities here, this
4229  * will be handled later in xmlStringGetNodeList
4230  *
4231  * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4232  *                   "'" ([^<&'] | Reference)* "'"
4233  *
4234  * 3.3.3 Attribute-Value Normalization:
4235  * Before the value of an attribute is passed to the application or
4236  * checked for validity, the XML processor must normalize it as follows:
4237  * - a character reference is processed by appending the referenced
4238  *   character to the attribute value
4239  * - an entity reference is processed by recursively processing the
4240  *   replacement text of the entity
4241  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4242  *   appending #x20 to the normalized value, except that only a single
4243  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4244  *   parsed entity or the literal entity value of an internal parsed entity
4245  * - other characters are processed by appending them to the normalized value
4246  * If the declared value is not CDATA, then the XML processor must further
4247  * process the normalized attribute value by discarding any leading and
4248  * trailing space (#x20) characters, and by replacing sequences of space
4249  * (#x20) characters by a single space (#x20) character.
4250  * All attributes for which no declaration has been read should be treated
4251  * by a non-validating parser as if declared CDATA.
4252  *
4253  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4254  */
4255 
4256 
4257 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)4258 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4259     if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4260     return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4261 }
4262 
4263 /**
4264  * xmlParseSystemLiteral:
4265  * @ctxt:  an XML parser context
4266  *
4267  * parse an XML Literal
4268  *
4269  * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4270  *
4271  * Returns the SystemLiteral parsed or NULL
4272  */
4273 
4274 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4275 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4276     xmlChar *buf = NULL;
4277     int len = 0;
4278     int size = XML_PARSER_BUFFER_SIZE;
4279     int cur, l;
4280     xmlChar stop;
4281     int state = ctxt->instate;
4282     int count = 0;
4283 
4284     SHRINK;
4285     if (RAW == '"') {
4286         NEXT;
4287 	stop = '"';
4288     } else if (RAW == '\'') {
4289         NEXT;
4290 	stop = '\'';
4291     } else {
4292 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4293 	return(NULL);
4294     }
4295 
4296     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4297     if (buf == NULL) {
4298         xmlErrMemory(ctxt, NULL);
4299 	return(NULL);
4300     }
4301     ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4302     cur = CUR_CHAR(l);
4303     while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4304 	if (len + 5 >= size) {
4305 	    xmlChar *tmp;
4306 
4307             if ((size > XML_MAX_NAME_LENGTH) &&
4308                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4309                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4310                 xmlFree(buf);
4311 		ctxt->instate = (xmlParserInputState) state;
4312                 return(NULL);
4313             }
4314 	    size *= 2;
4315 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4316 	    if (tmp == NULL) {
4317 	        xmlFree(buf);
4318 		xmlErrMemory(ctxt, NULL);
4319 		ctxt->instate = (xmlParserInputState) state;
4320 		return(NULL);
4321 	    }
4322 	    buf = tmp;
4323 	}
4324 	count++;
4325 	if (count > 50) {
4326 	    GROW;
4327 	    count = 0;
4328             if (ctxt->instate == XML_PARSER_EOF) {
4329 	        xmlFree(buf);
4330 		return(NULL);
4331             }
4332 	}
4333 	COPY_BUF(l,buf,len,cur);
4334 	NEXTL(l);
4335 	cur = CUR_CHAR(l);
4336 	if (cur == 0) {
4337 	    GROW;
4338 	    SHRINK;
4339 	    cur = CUR_CHAR(l);
4340 	}
4341     }
4342     buf[len] = 0;
4343     ctxt->instate = (xmlParserInputState) state;
4344     if (!IS_CHAR(cur)) {
4345 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4346     } else {
4347 	NEXT;
4348     }
4349     return(buf);
4350 }
4351 
4352 /**
4353  * xmlParsePubidLiteral:
4354  * @ctxt:  an XML parser context
4355  *
4356  * parse an XML public literal
4357  *
4358  * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4359  *
4360  * Returns the PubidLiteral parsed or NULL.
4361  */
4362 
4363 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4364 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4365     xmlChar *buf = NULL;
4366     int len = 0;
4367     int size = XML_PARSER_BUFFER_SIZE;
4368     xmlChar cur;
4369     xmlChar stop;
4370     int count = 0;
4371     xmlParserInputState oldstate = ctxt->instate;
4372 
4373     SHRINK;
4374     if (RAW == '"') {
4375         NEXT;
4376 	stop = '"';
4377     } else if (RAW == '\'') {
4378         NEXT;
4379 	stop = '\'';
4380     } else {
4381 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4382 	return(NULL);
4383     }
4384     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4385     if (buf == NULL) {
4386 	xmlErrMemory(ctxt, NULL);
4387 	return(NULL);
4388     }
4389     ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4390     cur = CUR;
4391     while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4392 	if (len + 1 >= size) {
4393 	    xmlChar *tmp;
4394 
4395             if ((size > XML_MAX_NAME_LENGTH) &&
4396                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4397                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4398                 xmlFree(buf);
4399                 return(NULL);
4400             }
4401 	    size *= 2;
4402 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4403 	    if (tmp == NULL) {
4404 		xmlErrMemory(ctxt, NULL);
4405 		xmlFree(buf);
4406 		return(NULL);
4407 	    }
4408 	    buf = tmp;
4409 	}
4410 	buf[len++] = cur;
4411 	count++;
4412 	if (count > 50) {
4413 	    GROW;
4414 	    count = 0;
4415             if (ctxt->instate == XML_PARSER_EOF) {
4416 		xmlFree(buf);
4417 		return(NULL);
4418             }
4419 	}
4420 	NEXT;
4421 	cur = CUR;
4422 	if (cur == 0) {
4423 	    GROW;
4424 	    SHRINK;
4425 	    cur = CUR;
4426 	}
4427     }
4428     buf[len] = 0;
4429     if (cur != stop) {
4430 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4431     } else {
4432 	NEXT;
4433     }
4434     ctxt->instate = oldstate;
4435     return(buf);
4436 }
4437 
4438 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4439 
4440 /*
4441  * used for the test in the inner loop of the char data testing
4442  */
4443 static const unsigned char test_char_data[256] = {
4444     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445     0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4446     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448     0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4449     0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4450     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4451     0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4452     0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4453     0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4454     0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4455     0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4456     0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4457     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4458     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4459     0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4460     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4461     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4462     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4463     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4464     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4465     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4466     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4467     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4468     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4469     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4470     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4471     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4472     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4473     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4474     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4475     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4476 };
4477 
4478 /**
4479  * xmlParseCharData:
4480  * @ctxt:  an XML parser context
4481  * @cdata:  int indicating whether we are within a CDATA section
4482  *
4483  * parse a CharData section.
4484  * if we are within a CDATA section ']]>' marks an end of section.
4485  *
4486  * The right angle bracket (>) may be represented using the string "&gt;",
4487  * and must, for compatibility, be escaped using "&gt;" or a character
4488  * reference when it appears in the string "]]>" in content, when that
4489  * string is not marking the end of a CDATA section.
4490  *
4491  * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4492  */
4493 
4494 void
xmlParseCharData(xmlParserCtxtPtr ctxt,int cdata)4495 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4496     const xmlChar *in;
4497     int nbchar = 0;
4498     int line = ctxt->input->line;
4499     int col = ctxt->input->col;
4500     int ccol;
4501 
4502     SHRINK;
4503     GROW;
4504     /*
4505      * Accelerated common case where input don't need to be
4506      * modified before passing it to the handler.
4507      */
4508     if (!cdata) {
4509 	in = ctxt->input->cur;
4510 	do {
4511 get_more_space:
4512 	    while (*in == 0x20) { in++; ctxt->input->col++; }
4513 	    if (*in == 0xA) {
4514 		do {
4515 		    ctxt->input->line++; ctxt->input->col = 1;
4516 		    in++;
4517 		} while (*in == 0xA);
4518 		goto get_more_space;
4519 	    }
4520 	    if (*in == '<') {
4521 		nbchar = in - ctxt->input->cur;
4522 		if (nbchar > 0) {
4523 		    const xmlChar *tmp = ctxt->input->cur;
4524 		    ctxt->input->cur = in;
4525 
4526 		    if ((ctxt->sax != NULL) &&
4527 		        (ctxt->sax->ignorableWhitespace !=
4528 		         ctxt->sax->characters)) {
4529 			if (areBlanks(ctxt, tmp, nbchar, 1)) {
4530 			    if (ctxt->sax->ignorableWhitespace != NULL)
4531 				ctxt->sax->ignorableWhitespace(ctxt->userData,
4532 						       tmp, nbchar);
4533 			} else {
4534 			    if (ctxt->sax->characters != NULL)
4535 				ctxt->sax->characters(ctxt->userData,
4536 						      tmp, nbchar);
4537 			    if (*ctxt->space == -1)
4538 			        *ctxt->space = -2;
4539 			}
4540 		    } else if ((ctxt->sax != NULL) &&
4541 		               (ctxt->sax->characters != NULL)) {
4542 			ctxt->sax->characters(ctxt->userData,
4543 					      tmp, nbchar);
4544 		    }
4545 		}
4546 		return;
4547 	    }
4548 
4549 get_more:
4550             ccol = ctxt->input->col;
4551 	    while (test_char_data[*in]) {
4552 		in++;
4553 		ccol++;
4554 	    }
4555 	    ctxt->input->col = ccol;
4556 	    if (*in == 0xA) {
4557 		do {
4558 		    ctxt->input->line++; ctxt->input->col = 1;
4559 		    in++;
4560 		} while (*in == 0xA);
4561 		goto get_more;
4562 	    }
4563 	    if (*in == ']') {
4564 		if ((in[1] == ']') && (in[2] == '>')) {
4565 		    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4566 		    ctxt->input->cur = in;
4567 		    return;
4568 		}
4569 		in++;
4570 		ctxt->input->col++;
4571 		goto get_more;
4572 	    }
4573 	    nbchar = in - ctxt->input->cur;
4574 	    if (nbchar > 0) {
4575 		if ((ctxt->sax != NULL) &&
4576 		    (ctxt->sax->ignorableWhitespace !=
4577 		     ctxt->sax->characters) &&
4578 		    (IS_BLANK_CH(*ctxt->input->cur))) {
4579 		    const xmlChar *tmp = ctxt->input->cur;
4580 		    ctxt->input->cur = in;
4581 
4582 		    if (areBlanks(ctxt, tmp, nbchar, 0)) {
4583 		        if (ctxt->sax->ignorableWhitespace != NULL)
4584 			    ctxt->sax->ignorableWhitespace(ctxt->userData,
4585 							   tmp, nbchar);
4586 		    } else {
4587 		        if (ctxt->sax->characters != NULL)
4588 			    ctxt->sax->characters(ctxt->userData,
4589 						  tmp, nbchar);
4590 			if (*ctxt->space == -1)
4591 			    *ctxt->space = -2;
4592 		    }
4593                     line = ctxt->input->line;
4594                     col = ctxt->input->col;
4595 		} else if (ctxt->sax != NULL) {
4596 		    if (ctxt->sax->characters != NULL)
4597 			ctxt->sax->characters(ctxt->userData,
4598 					      ctxt->input->cur, nbchar);
4599                     line = ctxt->input->line;
4600                     col = ctxt->input->col;
4601 		}
4602                 /* something really bad happened in the SAX callback */
4603                 if (ctxt->instate != XML_PARSER_CONTENT)
4604                     return;
4605 	    }
4606 	    ctxt->input->cur = in;
4607 	    if (*in == 0xD) {
4608 		in++;
4609 		if (*in == 0xA) {
4610 		    ctxt->input->cur = in;
4611 		    in++;
4612 		    ctxt->input->line++; ctxt->input->col = 1;
4613 		    continue; /* while */
4614 		}
4615 		in--;
4616 	    }
4617 	    if (*in == '<') {
4618 		return;
4619 	    }
4620 	    if (*in == '&') {
4621 		return;
4622 	    }
4623 	    SHRINK;
4624 	    GROW;
4625             if (ctxt->instate == XML_PARSER_EOF)
4626 		return;
4627 	    in = ctxt->input->cur;
4628 	} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4629 	nbchar = 0;
4630     }
4631     ctxt->input->line = line;
4632     ctxt->input->col = col;
4633     xmlParseCharDataComplex(ctxt, cdata);
4634 }
4635 
4636 /**
4637  * xmlParseCharDataComplex:
4638  * @ctxt:  an XML parser context
4639  * @cdata:  int indicating whether we are within a CDATA section
4640  *
4641  * parse a CharData section.this is the fallback function
4642  * of xmlParseCharData() when the parsing requires handling
4643  * of non-ASCII characters.
4644  */
4645 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int cdata)4646 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4647     xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4648     int nbchar = 0;
4649     int cur, l;
4650     int count = 0;
4651 
4652     SHRINK;
4653     GROW;
4654     cur = CUR_CHAR(l);
4655     while ((cur != '<') && /* checked */
4656            (cur != '&') &&
4657 	   (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4658 	if ((cur == ']') && (NXT(1) == ']') &&
4659 	    (NXT(2) == '>')) {
4660 	    if (cdata) break;
4661 	    else {
4662 		xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4663 	    }
4664 	}
4665 	COPY_BUF(l,buf,nbchar,cur);
4666 	if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4667 	    buf[nbchar] = 0;
4668 
4669 	    /*
4670 	     * OK the segment is to be consumed as chars.
4671 	     */
4672 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4673 		if (areBlanks(ctxt, buf, nbchar, 0)) {
4674 		    if (ctxt->sax->ignorableWhitespace != NULL)
4675 			ctxt->sax->ignorableWhitespace(ctxt->userData,
4676 			                               buf, nbchar);
4677 		} else {
4678 		    if (ctxt->sax->characters != NULL)
4679 			ctxt->sax->characters(ctxt->userData, buf, nbchar);
4680 		    if ((ctxt->sax->characters !=
4681 		         ctxt->sax->ignorableWhitespace) &&
4682 			(*ctxt->space == -1))
4683 			*ctxt->space = -2;
4684 		}
4685 	    }
4686 	    nbchar = 0;
4687             /* something really bad happened in the SAX callback */
4688             if (ctxt->instate != XML_PARSER_CONTENT)
4689                 return;
4690 	}
4691 	count++;
4692 	if (count > 50) {
4693 	    GROW;
4694 	    count = 0;
4695             if (ctxt->instate == XML_PARSER_EOF)
4696 		return;
4697 	}
4698 	NEXTL(l);
4699 	cur = CUR_CHAR(l);
4700     }
4701     if (nbchar != 0) {
4702         buf[nbchar] = 0;
4703 	/*
4704 	 * OK the segment is to be consumed as chars.
4705 	 */
4706 	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4707 	    if (areBlanks(ctxt, buf, nbchar, 0)) {
4708 		if (ctxt->sax->ignorableWhitespace != NULL)
4709 		    ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4710 	    } else {
4711 		if (ctxt->sax->characters != NULL)
4712 		    ctxt->sax->characters(ctxt->userData, buf, nbchar);
4713 		if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4714 		    (*ctxt->space == -1))
4715 		    *ctxt->space = -2;
4716 	    }
4717 	}
4718     }
4719     if ((cur != 0) && (!IS_CHAR(cur))) {
4720 	/* Generate the error and skip the offending character */
4721         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4722                           "PCDATA invalid Char value %d\n",
4723 	                  cur);
4724 	NEXTL(l);
4725     }
4726 }
4727 
4728 /**
4729  * xmlParseExternalID:
4730  * @ctxt:  an XML parser context
4731  * @publicID:  a xmlChar** receiving PubidLiteral
4732  * @strict: indicate whether we should restrict parsing to only
4733  *          production [75], see NOTE below
4734  *
4735  * Parse an External ID or a Public ID
4736  *
4737  * NOTE: Productions [75] and [83] interact badly since [75] can generate
4738  *       'PUBLIC' S PubidLiteral S SystemLiteral
4739  *
4740  * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4741  *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4742  *
4743  * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4744  *
4745  * Returns the function returns SystemLiteral and in the second
4746  *                case publicID receives PubidLiteral, is strict is off
4747  *                it is possible to return NULL and have publicID set.
4748  */
4749 
4750 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)4751 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4752     xmlChar *URI = NULL;
4753 
4754     SHRINK;
4755 
4756     *publicID = NULL;
4757     if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4758         SKIP(6);
4759 	if (!IS_BLANK_CH(CUR)) {
4760 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4761 	                   "Space required after 'SYSTEM'\n");
4762 	}
4763         SKIP_BLANKS;
4764 	URI = xmlParseSystemLiteral(ctxt);
4765 	if (URI == NULL) {
4766 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4767         }
4768     } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4769         SKIP(6);
4770 	if (!IS_BLANK_CH(CUR)) {
4771 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4772 		    "Space required after 'PUBLIC'\n");
4773 	}
4774         SKIP_BLANKS;
4775 	*publicID = xmlParsePubidLiteral(ctxt);
4776 	if (*publicID == NULL) {
4777 	    xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4778 	}
4779 	if (strict) {
4780 	    /*
4781 	     * We don't handle [83] so "S SystemLiteral" is required.
4782 	     */
4783 	    if (!IS_BLANK_CH(CUR)) {
4784 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4785 			"Space required after the Public Identifier\n");
4786 	    }
4787 	} else {
4788 	    /*
4789 	     * We handle [83] so we return immediately, if
4790 	     * "S SystemLiteral" is not detected. From a purely parsing
4791 	     * point of view that's a nice mess.
4792 	     */
4793 	    const xmlChar *ptr;
4794 	    GROW;
4795 
4796 	    ptr = CUR_PTR;
4797 	    if (!IS_BLANK_CH(*ptr)) return(NULL);
4798 
4799 	    while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4800 	    if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4801 	}
4802         SKIP_BLANKS;
4803 	URI = xmlParseSystemLiteral(ctxt);
4804 	if (URI == NULL) {
4805 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4806         }
4807     }
4808     return(URI);
4809 }
4810 
4811 /**
4812  * xmlParseCommentComplex:
4813  * @ctxt:  an XML parser context
4814  * @buf:  the already parsed part of the buffer
4815  * @len:  number of bytes filles in the buffer
4816  * @size:  allocated size of the buffer
4817  *
4818  * Skip an XML (SGML) comment <!-- .... -->
4819  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4820  *  must not occur within comments. "
4821  * This is the slow routine in case the accelerator for ascii didn't work
4822  *
4823  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4824  */
4825 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,size_t len,size_t size)4826 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4827                        size_t len, size_t size) {
4828     int q, ql;
4829     int r, rl;
4830     int cur, l;
4831     size_t count = 0;
4832     int inputid;
4833 
4834     inputid = ctxt->input->id;
4835 
4836     if (buf == NULL) {
4837         len = 0;
4838 	size = XML_PARSER_BUFFER_SIZE;
4839 	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4840 	if (buf == NULL) {
4841 	    xmlErrMemory(ctxt, NULL);
4842 	    return;
4843 	}
4844     }
4845     GROW;	/* Assure there's enough input data */
4846     q = CUR_CHAR(ql);
4847     if (q == 0)
4848         goto not_terminated;
4849     if (!IS_CHAR(q)) {
4850         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4851                           "xmlParseComment: invalid xmlChar value %d\n",
4852 	                  q);
4853 	xmlFree (buf);
4854 	return;
4855     }
4856     NEXTL(ql);
4857     r = CUR_CHAR(rl);
4858     if (r == 0)
4859         goto not_terminated;
4860     if (!IS_CHAR(r)) {
4861         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4862                           "xmlParseComment: invalid xmlChar value %d\n",
4863 	                  q);
4864 	xmlFree (buf);
4865 	return;
4866     }
4867     NEXTL(rl);
4868     cur = CUR_CHAR(l);
4869     if (cur == 0)
4870         goto not_terminated;
4871     while (IS_CHAR(cur) && /* checked */
4872            ((cur != '>') ||
4873 	    (r != '-') || (q != '-'))) {
4874 	if ((r == '-') && (q == '-')) {
4875 	    xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4876 	}
4877         if ((len > XML_MAX_TEXT_LENGTH) &&
4878             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4879             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4880                          "Comment too big found", NULL);
4881             xmlFree (buf);
4882             return;
4883         }
4884 	if (len + 5 >= size) {
4885 	    xmlChar *new_buf;
4886             size_t new_size;
4887 
4888 	    new_size = size * 2;
4889 	    new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4890 	    if (new_buf == NULL) {
4891 		xmlFree (buf);
4892 		xmlErrMemory(ctxt, NULL);
4893 		return;
4894 	    }
4895 	    buf = new_buf;
4896             size = new_size;
4897 	}
4898 	COPY_BUF(ql,buf,len,q);
4899 	q = r;
4900 	ql = rl;
4901 	r = cur;
4902 	rl = l;
4903 
4904 	count++;
4905 	if (count > 50) {
4906 	    GROW;
4907 	    count = 0;
4908             if (ctxt->instate == XML_PARSER_EOF) {
4909 		xmlFree(buf);
4910 		return;
4911             }
4912 	}
4913 	NEXTL(l);
4914 	cur = CUR_CHAR(l);
4915 	if (cur == 0) {
4916 	    SHRINK;
4917 	    GROW;
4918 	    cur = CUR_CHAR(l);
4919 	}
4920     }
4921     buf[len] = 0;
4922     if (cur == 0) {
4923 	xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4924 	                     "Comment not terminated \n<!--%.50s\n", buf);
4925     } else if (!IS_CHAR(cur)) {
4926         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4927                           "xmlParseComment: invalid xmlChar value %d\n",
4928 	                  cur);
4929     } else {
4930 	if (inputid != ctxt->input->id) {
4931 	    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4932 		"Comment doesn't start and stop in the same entity\n");
4933 	}
4934         NEXT;
4935 	if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4936 	    (!ctxt->disableSAX))
4937 	    ctxt->sax->comment(ctxt->userData, buf);
4938     }
4939     xmlFree(buf);
4940     return;
4941 not_terminated:
4942     xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4943 			 "Comment not terminated\n", NULL);
4944     xmlFree(buf);
4945     return;
4946 }
4947 
4948 /**
4949  * xmlParseComment:
4950  * @ctxt:  an XML parser context
4951  *
4952  * Skip an XML (SGML) comment <!-- .... -->
4953  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4954  *  must not occur within comments. "
4955  *
4956  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4957  */
4958 void
xmlParseComment(xmlParserCtxtPtr ctxt)4959 xmlParseComment(xmlParserCtxtPtr ctxt) {
4960     xmlChar *buf = NULL;
4961     size_t size = XML_PARSER_BUFFER_SIZE;
4962     size_t len = 0;
4963     xmlParserInputState state;
4964     const xmlChar *in;
4965     size_t nbchar = 0;
4966     int ccol;
4967     int inputid;
4968 
4969     /*
4970      * Check that there is a comment right here.
4971      */
4972     if ((RAW != '<') || (NXT(1) != '!') ||
4973         (NXT(2) != '-') || (NXT(3) != '-')) return;
4974     state = ctxt->instate;
4975     ctxt->instate = XML_PARSER_COMMENT;
4976     inputid = ctxt->input->id;
4977     SKIP(4);
4978     SHRINK;
4979     GROW;
4980 
4981     /*
4982      * Accelerated common case where input don't need to be
4983      * modified before passing it to the handler.
4984      */
4985     in = ctxt->input->cur;
4986     do {
4987 	if (*in == 0xA) {
4988 	    do {
4989 		ctxt->input->line++; ctxt->input->col = 1;
4990 		in++;
4991 	    } while (*in == 0xA);
4992 	}
4993 get_more:
4994         ccol = ctxt->input->col;
4995 	while (((*in > '-') && (*in <= 0x7F)) ||
4996 	       ((*in >= 0x20) && (*in < '-')) ||
4997 	       (*in == 0x09)) {
4998 		    in++;
4999 		    ccol++;
5000 	}
5001 	ctxt->input->col = ccol;
5002 	if (*in == 0xA) {
5003 	    do {
5004 		ctxt->input->line++; ctxt->input->col = 1;
5005 		in++;
5006 	    } while (*in == 0xA);
5007 	    goto get_more;
5008 	}
5009 	nbchar = in - ctxt->input->cur;
5010 	/*
5011 	 * save current set of data
5012 	 */
5013 	if (nbchar > 0) {
5014 	    if ((ctxt->sax != NULL) &&
5015 		(ctxt->sax->comment != NULL)) {
5016 		if (buf == NULL) {
5017 		    if ((*in == '-') && (in[1] == '-'))
5018 		        size = nbchar + 1;
5019 		    else
5020 		        size = XML_PARSER_BUFFER_SIZE + nbchar;
5021 		    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5022 		    if (buf == NULL) {
5023 		        xmlErrMemory(ctxt, NULL);
5024 			ctxt->instate = state;
5025 			return;
5026 		    }
5027 		    len = 0;
5028 		} else if (len + nbchar + 1 >= size) {
5029 		    xmlChar *new_buf;
5030 		    size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5031 		    new_buf = (xmlChar *) xmlRealloc(buf,
5032 		                                     size * sizeof(xmlChar));
5033 		    if (new_buf == NULL) {
5034 		        xmlFree (buf);
5035 			xmlErrMemory(ctxt, NULL);
5036 			ctxt->instate = state;
5037 			return;
5038 		    }
5039 		    buf = new_buf;
5040 		}
5041 		memcpy(&buf[len], ctxt->input->cur, nbchar);
5042 		len += nbchar;
5043 		buf[len] = 0;
5044 	    }
5045 	}
5046         if ((len > XML_MAX_TEXT_LENGTH) &&
5047             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5048             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5049                          "Comment too big found", NULL);
5050             xmlFree (buf);
5051             return;
5052         }
5053 	ctxt->input->cur = in;
5054 	if (*in == 0xA) {
5055 	    in++;
5056 	    ctxt->input->line++; ctxt->input->col = 1;
5057 	}
5058 	if (*in == 0xD) {
5059 	    in++;
5060 	    if (*in == 0xA) {
5061 		ctxt->input->cur = in;
5062 		in++;
5063 		ctxt->input->line++; ctxt->input->col = 1;
5064 		continue; /* while */
5065 	    }
5066 	    in--;
5067 	}
5068 	SHRINK;
5069 	GROW;
5070         if (ctxt->instate == XML_PARSER_EOF) {
5071             xmlFree(buf);
5072             return;
5073         }
5074 	in = ctxt->input->cur;
5075 	if (*in == '-') {
5076 	    if (in[1] == '-') {
5077 	        if (in[2] == '>') {
5078 		    if (ctxt->input->id != inputid) {
5079 			xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5080 			"comment doesn't start and stop in the same entity\n");
5081 		    }
5082 		    SKIP(3);
5083 		    if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5084 		        (!ctxt->disableSAX)) {
5085 			if (buf != NULL)
5086 			    ctxt->sax->comment(ctxt->userData, buf);
5087 			else
5088 			    ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5089 		    }
5090 		    if (buf != NULL)
5091 		        xmlFree(buf);
5092 		    if (ctxt->instate != XML_PARSER_EOF)
5093 			ctxt->instate = state;
5094 		    return;
5095 		}
5096 		if (buf != NULL) {
5097 		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5098 		                      "Double hyphen within comment: "
5099                                       "<!--%.50s\n",
5100 				      buf);
5101 		} else
5102 		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5103 		                      "Double hyphen within comment\n", NULL);
5104 		in++;
5105 		ctxt->input->col++;
5106 	    }
5107 	    in++;
5108 	    ctxt->input->col++;
5109 	    goto get_more;
5110 	}
5111     } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5112     xmlParseCommentComplex(ctxt, buf, len, size);
5113     ctxt->instate = state;
5114     return;
5115 }
5116 
5117 
5118 /**
5119  * xmlParsePITarget:
5120  * @ctxt:  an XML parser context
5121  *
5122  * parse the name of a PI
5123  *
5124  * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5125  *
5126  * Returns the PITarget name or NULL
5127  */
5128 
5129 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)5130 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5131     const xmlChar *name;
5132 
5133     name = xmlParseName(ctxt);
5134     if ((name != NULL) &&
5135         ((name[0] == 'x') || (name[0] == 'X')) &&
5136         ((name[1] == 'm') || (name[1] == 'M')) &&
5137         ((name[2] == 'l') || (name[2] == 'L'))) {
5138 	int i;
5139 	if ((name[0] == 'x') && (name[1] == 'm') &&
5140 	    (name[2] == 'l') && (name[3] == 0)) {
5141 	    xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5142 		 "XML declaration allowed only at the start of the document\n");
5143 	    return(name);
5144 	} else if (name[3] == 0) {
5145 	    xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5146 	    return(name);
5147 	}
5148 	for (i = 0;;i++) {
5149 	    if (xmlW3CPIs[i] == NULL) break;
5150 	    if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5151 	        return(name);
5152 	}
5153 	xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5154 		      "xmlParsePITarget: invalid name prefix 'xml'\n",
5155 		      NULL, NULL);
5156     }
5157     if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5158 	xmlNsErr(ctxt, XML_NS_ERR_COLON,
5159 		 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5160     }
5161     return(name);
5162 }
5163 
5164 #ifdef LIBXML_CATALOG_ENABLED
5165 /**
5166  * xmlParseCatalogPI:
5167  * @ctxt:  an XML parser context
5168  * @catalog:  the PI value string
5169  *
5170  * parse an XML Catalog Processing Instruction.
5171  *
5172  * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5173  *
5174  * Occurs only if allowed by the user and if happening in the Misc
5175  * part of the document before any doctype informations
5176  * This will add the given catalog to the parsing context in order
5177  * to be used if there is a resolution need further down in the document
5178  */
5179 
5180 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)5181 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5182     xmlChar *URL = NULL;
5183     const xmlChar *tmp, *base;
5184     xmlChar marker;
5185 
5186     tmp = catalog;
5187     while (IS_BLANK_CH(*tmp)) tmp++;
5188     if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5189 	goto error;
5190     tmp += 7;
5191     while (IS_BLANK_CH(*tmp)) tmp++;
5192     if (*tmp != '=') {
5193 	return;
5194     }
5195     tmp++;
5196     while (IS_BLANK_CH(*tmp)) tmp++;
5197     marker = *tmp;
5198     if ((marker != '\'') && (marker != '"'))
5199 	goto error;
5200     tmp++;
5201     base = tmp;
5202     while ((*tmp != 0) && (*tmp != marker)) tmp++;
5203     if (*tmp == 0)
5204 	goto error;
5205     URL = xmlStrndup(base, tmp - base);
5206     tmp++;
5207     while (IS_BLANK_CH(*tmp)) tmp++;
5208     if (*tmp != 0)
5209 	goto error;
5210 
5211     if (URL != NULL) {
5212 	ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5213 	xmlFree(URL);
5214     }
5215     return;
5216 
5217 error:
5218     xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5219 	          "Catalog PI syntax error: %s\n",
5220 		  catalog, NULL);
5221     if (URL != NULL)
5222 	xmlFree(URL);
5223 }
5224 #endif
5225 
5226 /**
5227  * xmlParsePI:
5228  * @ctxt:  an XML parser context
5229  *
5230  * parse an XML Processing Instruction.
5231  *
5232  * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5233  *
5234  * The processing is transfered to SAX once parsed.
5235  */
5236 
5237 void
xmlParsePI(xmlParserCtxtPtr ctxt)5238 xmlParsePI(xmlParserCtxtPtr ctxt) {
5239     xmlChar *buf = NULL;
5240     size_t len = 0;
5241     size_t size = XML_PARSER_BUFFER_SIZE;
5242     int cur, l;
5243     const xmlChar *target;
5244     xmlParserInputState state;
5245     int count = 0;
5246 
5247     if ((RAW == '<') && (NXT(1) == '?')) {
5248 	xmlParserInputPtr input = ctxt->input;
5249 	state = ctxt->instate;
5250         ctxt->instate = XML_PARSER_PI;
5251 	/*
5252 	 * this is a Processing Instruction.
5253 	 */
5254 	SKIP(2);
5255 	SHRINK;
5256 
5257 	/*
5258 	 * Parse the target name and check for special support like
5259 	 * namespace.
5260 	 */
5261         target = xmlParsePITarget(ctxt);
5262 	if (target != NULL) {
5263 	    if ((RAW == '?') && (NXT(1) == '>')) {
5264 		if (input != ctxt->input) {
5265 		    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5266 	    "PI declaration doesn't start and stop in the same entity\n");
5267 		}
5268 		SKIP(2);
5269 
5270 		/*
5271 		 * SAX: PI detected.
5272 		 */
5273 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5274 		    (ctxt->sax->processingInstruction != NULL))
5275 		    ctxt->sax->processingInstruction(ctxt->userData,
5276 		                                     target, NULL);
5277 		if (ctxt->instate != XML_PARSER_EOF)
5278 		    ctxt->instate = state;
5279 		return;
5280 	    }
5281 	    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5282 	    if (buf == NULL) {
5283 		xmlErrMemory(ctxt, NULL);
5284 		ctxt->instate = state;
5285 		return;
5286 	    }
5287 	    cur = CUR;
5288 	    if (!IS_BLANK(cur)) {
5289 		xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5290 			  "ParsePI: PI %s space expected\n", target);
5291 	    }
5292             SKIP_BLANKS;
5293 	    cur = CUR_CHAR(l);
5294 	    while (IS_CHAR(cur) && /* checked */
5295 		   ((cur != '?') || (NXT(1) != '>'))) {
5296 		if (len + 5 >= size) {
5297 		    xmlChar *tmp;
5298                     size_t new_size = size * 2;
5299 		    tmp = (xmlChar *) xmlRealloc(buf, new_size);
5300 		    if (tmp == NULL) {
5301 			xmlErrMemory(ctxt, NULL);
5302 			xmlFree(buf);
5303 			ctxt->instate = state;
5304 			return;
5305 		    }
5306 		    buf = tmp;
5307                     size = new_size;
5308 		}
5309 		count++;
5310 		if (count > 50) {
5311 		    GROW;
5312                     if (ctxt->instate == XML_PARSER_EOF) {
5313                         xmlFree(buf);
5314                         return;
5315                     }
5316 		    count = 0;
5317                     if ((len > XML_MAX_TEXT_LENGTH) &&
5318                         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5319                         xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5320                                           "PI %s too big found", target);
5321                         xmlFree(buf);
5322                         ctxt->instate = state;
5323                         return;
5324                     }
5325 		}
5326 		COPY_BUF(l,buf,len,cur);
5327 		NEXTL(l);
5328 		cur = CUR_CHAR(l);
5329 		if (cur == 0) {
5330 		    SHRINK;
5331 		    GROW;
5332 		    cur = CUR_CHAR(l);
5333 		}
5334 	    }
5335             if ((len > XML_MAX_TEXT_LENGTH) &&
5336                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5337                 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5338                                   "PI %s too big found", target);
5339                 xmlFree(buf);
5340                 ctxt->instate = state;
5341                 return;
5342             }
5343 	    buf[len] = 0;
5344 	    if (cur != '?') {
5345 		xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5346 		      "ParsePI: PI %s never end ...\n", target);
5347 	    } else {
5348 		if (input != ctxt->input) {
5349 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5350 	    "PI declaration doesn't start and stop in the same entity\n");
5351 		}
5352 		SKIP(2);
5353 
5354 #ifdef LIBXML_CATALOG_ENABLED
5355 		if (((state == XML_PARSER_MISC) ||
5356 	             (state == XML_PARSER_START)) &&
5357 		    (xmlStrEqual(target, XML_CATALOG_PI))) {
5358 		    xmlCatalogAllow allow = xmlCatalogGetDefaults();
5359 		    if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5360 			(allow == XML_CATA_ALLOW_ALL))
5361 			xmlParseCatalogPI(ctxt, buf);
5362 		}
5363 #endif
5364 
5365 
5366 		/*
5367 		 * SAX: PI detected.
5368 		 */
5369 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5370 		    (ctxt->sax->processingInstruction != NULL))
5371 		    ctxt->sax->processingInstruction(ctxt->userData,
5372 		                                     target, buf);
5373 	    }
5374 	    xmlFree(buf);
5375 	} else {
5376 	    xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5377 	}
5378 	if (ctxt->instate != XML_PARSER_EOF)
5379 	    ctxt->instate = state;
5380     }
5381 }
5382 
5383 /**
5384  * xmlParseNotationDecl:
5385  * @ctxt:  an XML parser context
5386  *
5387  * parse a notation declaration
5388  *
5389  * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5390  *
5391  * Hence there is actually 3 choices:
5392  *     'PUBLIC' S PubidLiteral
5393  *     'PUBLIC' S PubidLiteral S SystemLiteral
5394  * and 'SYSTEM' S SystemLiteral
5395  *
5396  * See the NOTE on xmlParseExternalID().
5397  */
5398 
5399 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5400 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5401     const xmlChar *name;
5402     xmlChar *Pubid;
5403     xmlChar *Systemid;
5404 
5405     if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5406 	xmlParserInputPtr input = ctxt->input;
5407 	SHRINK;
5408 	SKIP(10);
5409 	if (!IS_BLANK_CH(CUR)) {
5410 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5411 			   "Space required after '<!NOTATION'\n");
5412 	    return;
5413 	}
5414 	SKIP_BLANKS;
5415 
5416         name = xmlParseName(ctxt);
5417 	if (name == NULL) {
5418 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5419 	    return;
5420 	}
5421 	if (!IS_BLANK_CH(CUR)) {
5422 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423 		     "Space required after the NOTATION name'\n");
5424 	    return;
5425 	}
5426 	if (xmlStrchr(name, ':') != NULL) {
5427 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5428 		     "colons are forbidden from notation names '%s'\n",
5429 		     name, NULL, NULL);
5430 	}
5431 	SKIP_BLANKS;
5432 
5433 	/*
5434 	 * Parse the IDs.
5435 	 */
5436 	Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5437 	SKIP_BLANKS;
5438 
5439 	if (RAW == '>') {
5440 	    if (input != ctxt->input) {
5441 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5442 	"Notation declaration doesn't start and stop in the same entity\n");
5443 	    }
5444 	    NEXT;
5445 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5446 		(ctxt->sax->notationDecl != NULL))
5447 		ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5448 	} else {
5449 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5450 	}
5451 	if (Systemid != NULL) xmlFree(Systemid);
5452 	if (Pubid != NULL) xmlFree(Pubid);
5453     }
5454 }
5455 
5456 /**
5457  * xmlParseEntityDecl:
5458  * @ctxt:  an XML parser context
5459  *
5460  * parse <!ENTITY declarations
5461  *
5462  * [70] EntityDecl ::= GEDecl | PEDecl
5463  *
5464  * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5465  *
5466  * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5467  *
5468  * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5469  *
5470  * [74] PEDef ::= EntityValue | ExternalID
5471  *
5472  * [76] NDataDecl ::= S 'NDATA' S Name
5473  *
5474  * [ VC: Notation Declared ]
5475  * The Name must match the declared name of a notation.
5476  */
5477 
5478 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5479 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5480     const xmlChar *name = NULL;
5481     xmlChar *value = NULL;
5482     xmlChar *URI = NULL, *literal = NULL;
5483     const xmlChar *ndata = NULL;
5484     int isParameter = 0;
5485     xmlChar *orig = NULL;
5486     int skipped;
5487 
5488     /* GROW; done in the caller */
5489     if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5490 	xmlParserInputPtr input = ctxt->input;
5491 	SHRINK;
5492 	SKIP(8);
5493 	skipped = SKIP_BLANKS;
5494 	if (skipped == 0) {
5495 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5496 			   "Space required after '<!ENTITY'\n");
5497 	}
5498 
5499 	if (RAW == '%') {
5500 	    NEXT;
5501 	    skipped = SKIP_BLANKS;
5502 	    if (skipped == 0) {
5503 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5504 			       "Space required after '%'\n");
5505 	    }
5506 	    isParameter = 1;
5507 	}
5508 
5509         name = xmlParseName(ctxt);
5510 	if (name == NULL) {
5511 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5512 	                   "xmlParseEntityDecl: no name\n");
5513             return;
5514 	}
5515 	if (xmlStrchr(name, ':') != NULL) {
5516 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5517 		     "colons are forbidden from entities names '%s'\n",
5518 		     name, NULL, NULL);
5519 	}
5520         skipped = SKIP_BLANKS;
5521 	if (skipped == 0) {
5522 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5523 			   "Space required after the entity name\n");
5524 	}
5525 
5526 	ctxt->instate = XML_PARSER_ENTITY_DECL;
5527 	/*
5528 	 * handle the various case of definitions...
5529 	 */
5530 	if (isParameter) {
5531 	    if ((RAW == '"') || (RAW == '\'')) {
5532 	        value = xmlParseEntityValue(ctxt, &orig);
5533 		if (value) {
5534 		    if ((ctxt->sax != NULL) &&
5535 			(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5536 			ctxt->sax->entityDecl(ctxt->userData, name,
5537 		                    XML_INTERNAL_PARAMETER_ENTITY,
5538 				    NULL, NULL, value);
5539 		}
5540 	    } else {
5541 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5542 		if ((URI == NULL) && (literal == NULL)) {
5543 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5544 		}
5545 		if (URI) {
5546 		    xmlURIPtr uri;
5547 
5548 		    uri = xmlParseURI((const char *) URI);
5549 		    if (uri == NULL) {
5550 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5551 				     "Invalid URI: %s\n", URI);
5552 			/*
5553 			 * This really ought to be a well formedness error
5554 			 * but the XML Core WG decided otherwise c.f. issue
5555 			 * E26 of the XML erratas.
5556 			 */
5557 		    } else {
5558 			if (uri->fragment != NULL) {
5559 			    /*
5560 			     * Okay this is foolish to block those but not
5561 			     * invalid URIs.
5562 			     */
5563 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5564 			} else {
5565 			    if ((ctxt->sax != NULL) &&
5566 				(!ctxt->disableSAX) &&
5567 				(ctxt->sax->entityDecl != NULL))
5568 				ctxt->sax->entityDecl(ctxt->userData, name,
5569 					    XML_EXTERNAL_PARAMETER_ENTITY,
5570 					    literal, URI, NULL);
5571 			}
5572 			xmlFreeURI(uri);
5573 		    }
5574 		}
5575 	    }
5576 	} else {
5577 	    if ((RAW == '"') || (RAW == '\'')) {
5578 	        value = xmlParseEntityValue(ctxt, &orig);
5579 		if ((ctxt->sax != NULL) &&
5580 		    (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5581 		    ctxt->sax->entityDecl(ctxt->userData, name,
5582 				XML_INTERNAL_GENERAL_ENTITY,
5583 				NULL, NULL, value);
5584 		/*
5585 		 * For expat compatibility in SAX mode.
5586 		 */
5587 		if ((ctxt->myDoc == NULL) ||
5588 		    (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5589 		    if (ctxt->myDoc == NULL) {
5590 			ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5591 			if (ctxt->myDoc == NULL) {
5592 			    xmlErrMemory(ctxt, "New Doc failed");
5593 			    return;
5594 			}
5595 			ctxt->myDoc->properties = XML_DOC_INTERNAL;
5596 		    }
5597 		    if (ctxt->myDoc->intSubset == NULL)
5598 			ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5599 					    BAD_CAST "fake", NULL, NULL);
5600 
5601 		    xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5602 			              NULL, NULL, value);
5603 		}
5604 	    } else {
5605 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5606 		if ((URI == NULL) && (literal == NULL)) {
5607 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5608 		}
5609 		if (URI) {
5610 		    xmlURIPtr uri;
5611 
5612 		    uri = xmlParseURI((const char *)URI);
5613 		    if (uri == NULL) {
5614 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5615 				     "Invalid URI: %s\n", URI);
5616 			/*
5617 			 * This really ought to be a well formedness error
5618 			 * but the XML Core WG decided otherwise c.f. issue
5619 			 * E26 of the XML erratas.
5620 			 */
5621 		    } else {
5622 			if (uri->fragment != NULL) {
5623 			    /*
5624 			     * Okay this is foolish to block those but not
5625 			     * invalid URIs.
5626 			     */
5627 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5628 			}
5629 			xmlFreeURI(uri);
5630 		    }
5631 		}
5632 		if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5633 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5634 				   "Space required before 'NDATA'\n");
5635 		}
5636 		SKIP_BLANKS;
5637 		if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5638 		    SKIP(5);
5639 		    if (!IS_BLANK_CH(CUR)) {
5640 			xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5641 				       "Space required after 'NDATA'\n");
5642 		    }
5643 		    SKIP_BLANKS;
5644 		    ndata = xmlParseName(ctxt);
5645 		    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5646 		        (ctxt->sax->unparsedEntityDecl != NULL))
5647 			ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5648 				    literal, URI, ndata);
5649 		} else {
5650 		    if ((ctxt->sax != NULL) &&
5651 		        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5652 			ctxt->sax->entityDecl(ctxt->userData, name,
5653 				    XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5654 				    literal, URI, NULL);
5655 		    /*
5656 		     * For expat compatibility in SAX mode.
5657 		     * assuming the entity repalcement was asked for
5658 		     */
5659 		    if ((ctxt->replaceEntities != 0) &&
5660 			((ctxt->myDoc == NULL) ||
5661 			(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5662 			if (ctxt->myDoc == NULL) {
5663 			    ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5664 			    if (ctxt->myDoc == NULL) {
5665 			        xmlErrMemory(ctxt, "New Doc failed");
5666 				return;
5667 			    }
5668 			    ctxt->myDoc->properties = XML_DOC_INTERNAL;
5669 			}
5670 
5671 			if (ctxt->myDoc->intSubset == NULL)
5672 			    ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5673 						BAD_CAST "fake", NULL, NULL);
5674 			xmlSAX2EntityDecl(ctxt, name,
5675 				          XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5676 				          literal, URI, NULL);
5677 		    }
5678 		}
5679 	    }
5680 	}
5681 	if (ctxt->instate == XML_PARSER_EOF)
5682 	    return;
5683 	SKIP_BLANKS;
5684 	if (RAW != '>') {
5685 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5686 	            "xmlParseEntityDecl: entity %s not terminated\n", name);
5687 	    xmlHaltParser(ctxt);
5688 	} else {
5689 	    if (input != ctxt->input) {
5690 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5691 	"Entity declaration doesn't start and stop in the same entity\n");
5692 	    }
5693 	    NEXT;
5694 	}
5695 	if (orig != NULL) {
5696 	    /*
5697 	     * Ugly mechanism to save the raw entity value.
5698 	     */
5699 	    xmlEntityPtr cur = NULL;
5700 
5701 	    if (isParameter) {
5702 	        if ((ctxt->sax != NULL) &&
5703 		    (ctxt->sax->getParameterEntity != NULL))
5704 		    cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5705 	    } else {
5706 	        if ((ctxt->sax != NULL) &&
5707 		    (ctxt->sax->getEntity != NULL))
5708 		    cur = ctxt->sax->getEntity(ctxt->userData, name);
5709 		if ((cur == NULL) && (ctxt->userData==ctxt)) {
5710 		    cur = xmlSAX2GetEntity(ctxt, name);
5711 		}
5712 	    }
5713             if (cur != NULL) {
5714 	        if (cur->orig != NULL)
5715 		    xmlFree(orig);
5716 		else
5717 		    cur->orig = orig;
5718 	    } else
5719 		xmlFree(orig);
5720 	}
5721 	if (value != NULL) xmlFree(value);
5722 	if (URI != NULL) xmlFree(URI);
5723 	if (literal != NULL) xmlFree(literal);
5724     }
5725 }
5726 
5727 /**
5728  * xmlParseDefaultDecl:
5729  * @ctxt:  an XML parser context
5730  * @value:  Receive a possible fixed default value for the attribute
5731  *
5732  * Parse an attribute default declaration
5733  *
5734  * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5735  *
5736  * [ VC: Required Attribute ]
5737  * if the default declaration is the keyword #REQUIRED, then the
5738  * attribute must be specified for all elements of the type in the
5739  * attribute-list declaration.
5740  *
5741  * [ VC: Attribute Default Legal ]
5742  * The declared default value must meet the lexical constraints of
5743  * the declared attribute type c.f. xmlValidateAttributeDecl()
5744  *
5745  * [ VC: Fixed Attribute Default ]
5746  * if an attribute has a default value declared with the #FIXED
5747  * keyword, instances of that attribute must match the default value.
5748  *
5749  * [ WFC: No < in Attribute Values ]
5750  * handled in xmlParseAttValue()
5751  *
5752  * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5753  *          or XML_ATTRIBUTE_FIXED.
5754  */
5755 
5756 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5757 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5758     int val;
5759     xmlChar *ret;
5760 
5761     *value = NULL;
5762     if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5763 	SKIP(9);
5764 	return(XML_ATTRIBUTE_REQUIRED);
5765     }
5766     if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5767 	SKIP(8);
5768 	return(XML_ATTRIBUTE_IMPLIED);
5769     }
5770     val = XML_ATTRIBUTE_NONE;
5771     if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5772 	SKIP(6);
5773 	val = XML_ATTRIBUTE_FIXED;
5774 	if (!IS_BLANK_CH(CUR)) {
5775 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5776 			   "Space required after '#FIXED'\n");
5777 	}
5778 	SKIP_BLANKS;
5779     }
5780     ret = xmlParseAttValue(ctxt);
5781     ctxt->instate = XML_PARSER_DTD;
5782     if (ret == NULL) {
5783 	xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5784 		       "Attribute default value declaration error\n");
5785     } else
5786         *value = ret;
5787     return(val);
5788 }
5789 
5790 /**
5791  * xmlParseNotationType:
5792  * @ctxt:  an XML parser context
5793  *
5794  * parse an Notation attribute type.
5795  *
5796  * Note: the leading 'NOTATION' S part has already being parsed...
5797  *
5798  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5799  *
5800  * [ VC: Notation Attributes ]
5801  * Values of this type must match one of the notation names included
5802  * in the declaration; all notation names in the declaration must be declared.
5803  *
5804  * Returns: the notation attribute tree built while parsing
5805  */
5806 
5807 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5808 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5809     const xmlChar *name;
5810     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5811 
5812     if (RAW != '(') {
5813 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5814 	return(NULL);
5815     }
5816     SHRINK;
5817     do {
5818         NEXT;
5819 	SKIP_BLANKS;
5820         name = xmlParseName(ctxt);
5821 	if (name == NULL) {
5822 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5823 			   "Name expected in NOTATION declaration\n");
5824             xmlFreeEnumeration(ret);
5825 	    return(NULL);
5826 	}
5827 	tmp = ret;
5828 	while (tmp != NULL) {
5829 	    if (xmlStrEqual(name, tmp->name)) {
5830 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5831 	  "standalone: attribute notation value token %s duplicated\n",
5832 				 name, NULL);
5833 		if (!xmlDictOwns(ctxt->dict, name))
5834 		    xmlFree((xmlChar *) name);
5835 		break;
5836 	    }
5837 	    tmp = tmp->next;
5838 	}
5839 	if (tmp == NULL) {
5840 	    cur = xmlCreateEnumeration(name);
5841 	    if (cur == NULL) {
5842                 xmlFreeEnumeration(ret);
5843                 return(NULL);
5844             }
5845 	    if (last == NULL) ret = last = cur;
5846 	    else {
5847 		last->next = cur;
5848 		last = cur;
5849 	    }
5850 	}
5851 	SKIP_BLANKS;
5852     } while (RAW == '|');
5853     if (RAW != ')') {
5854 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5855         xmlFreeEnumeration(ret);
5856 	return(NULL);
5857     }
5858     NEXT;
5859     return(ret);
5860 }
5861 
5862 /**
5863  * xmlParseEnumerationType:
5864  * @ctxt:  an XML parser context
5865  *
5866  * parse an Enumeration attribute type.
5867  *
5868  * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5869  *
5870  * [ VC: Enumeration ]
5871  * Values of this type must match one of the Nmtoken tokens in
5872  * the declaration
5873  *
5874  * Returns: the enumeration attribute tree built while parsing
5875  */
5876 
5877 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)5878 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5879     xmlChar *name;
5880     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5881 
5882     if (RAW != '(') {
5883 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5884 	return(NULL);
5885     }
5886     SHRINK;
5887     do {
5888         NEXT;
5889 	SKIP_BLANKS;
5890         name = xmlParseNmtoken(ctxt);
5891 	if (name == NULL) {
5892 	    xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5893 	    return(ret);
5894 	}
5895 	tmp = ret;
5896 	while (tmp != NULL) {
5897 	    if (xmlStrEqual(name, tmp->name)) {
5898 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5899 	  "standalone: attribute enumeration value token %s duplicated\n",
5900 				 name, NULL);
5901 		if (!xmlDictOwns(ctxt->dict, name))
5902 		    xmlFree(name);
5903 		break;
5904 	    }
5905 	    tmp = tmp->next;
5906 	}
5907 	if (tmp == NULL) {
5908 	    cur = xmlCreateEnumeration(name);
5909 	    if (!xmlDictOwns(ctxt->dict, name))
5910 		xmlFree(name);
5911 	    if (cur == NULL) {
5912                 xmlFreeEnumeration(ret);
5913                 return(NULL);
5914             }
5915 	    if (last == NULL) ret = last = cur;
5916 	    else {
5917 		last->next = cur;
5918 		last = cur;
5919 	    }
5920 	}
5921 	SKIP_BLANKS;
5922     } while (RAW == '|');
5923     if (RAW != ')') {
5924 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5925 	return(ret);
5926     }
5927     NEXT;
5928     return(ret);
5929 }
5930 
5931 /**
5932  * xmlParseEnumeratedType:
5933  * @ctxt:  an XML parser context
5934  * @tree:  the enumeration tree built while parsing
5935  *
5936  * parse an Enumerated attribute type.
5937  *
5938  * [57] EnumeratedType ::= NotationType | Enumeration
5939  *
5940  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5941  *
5942  *
5943  * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5944  */
5945 
5946 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5947 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5948     if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5949 	SKIP(8);
5950 	if (!IS_BLANK_CH(CUR)) {
5951 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5952 			   "Space required after 'NOTATION'\n");
5953 	    return(0);
5954 	}
5955         SKIP_BLANKS;
5956 	*tree = xmlParseNotationType(ctxt);
5957 	if (*tree == NULL) return(0);
5958 	return(XML_ATTRIBUTE_NOTATION);
5959     }
5960     *tree = xmlParseEnumerationType(ctxt);
5961     if (*tree == NULL) return(0);
5962     return(XML_ATTRIBUTE_ENUMERATION);
5963 }
5964 
5965 /**
5966  * xmlParseAttributeType:
5967  * @ctxt:  an XML parser context
5968  * @tree:  the enumeration tree built while parsing
5969  *
5970  * parse the Attribute list def for an element
5971  *
5972  * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5973  *
5974  * [55] StringType ::= 'CDATA'
5975  *
5976  * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5977  *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5978  *
5979  * Validity constraints for attribute values syntax are checked in
5980  * xmlValidateAttributeValue()
5981  *
5982  * [ VC: ID ]
5983  * Values of type ID must match the Name production. A name must not
5984  * appear more than once in an XML document as a value of this type;
5985  * i.e., ID values must uniquely identify the elements which bear them.
5986  *
5987  * [ VC: One ID per Element Type ]
5988  * No element type may have more than one ID attribute specified.
5989  *
5990  * [ VC: ID Attribute Default ]
5991  * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5992  *
5993  * [ VC: IDREF ]
5994  * Values of type IDREF must match the Name production, and values
5995  * of type IDREFS must match Names; each IDREF Name must match the value
5996  * of an ID attribute on some element in the XML document; i.e. IDREF
5997  * values must match the value of some ID attribute.
5998  *
5999  * [ VC: Entity Name ]
6000  * Values of type ENTITY must match the Name production, values
6001  * of type ENTITIES must match Names; each Entity Name must match the
6002  * name of an unparsed entity declared in the DTD.
6003  *
6004  * [ VC: Name Token ]
6005  * Values of type NMTOKEN must match the Nmtoken production; values
6006  * of type NMTOKENS must match Nmtokens.
6007  *
6008  * Returns the attribute type
6009  */
6010 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)6011 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6012     SHRINK;
6013     if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6014 	SKIP(5);
6015 	return(XML_ATTRIBUTE_CDATA);
6016      } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6017 	SKIP(6);
6018 	return(XML_ATTRIBUTE_IDREFS);
6019      } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6020 	SKIP(5);
6021 	return(XML_ATTRIBUTE_IDREF);
6022      } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6023         SKIP(2);
6024 	return(XML_ATTRIBUTE_ID);
6025      } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6026 	SKIP(6);
6027 	return(XML_ATTRIBUTE_ENTITY);
6028      } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6029 	SKIP(8);
6030 	return(XML_ATTRIBUTE_ENTITIES);
6031      } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6032 	SKIP(8);
6033 	return(XML_ATTRIBUTE_NMTOKENS);
6034      } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6035 	SKIP(7);
6036 	return(XML_ATTRIBUTE_NMTOKEN);
6037      }
6038      return(xmlParseEnumeratedType(ctxt, tree));
6039 }
6040 
6041 /**
6042  * xmlParseAttributeListDecl:
6043  * @ctxt:  an XML parser context
6044  *
6045  * : parse the Attribute list def for an element
6046  *
6047  * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6048  *
6049  * [53] AttDef ::= S Name S AttType S DefaultDecl
6050  *
6051  */
6052 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)6053 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6054     const xmlChar *elemName;
6055     const xmlChar *attrName;
6056     xmlEnumerationPtr tree;
6057 
6058     if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6059 	xmlParserInputPtr input = ctxt->input;
6060 
6061 	SKIP(9);
6062 	if (!IS_BLANK_CH(CUR)) {
6063 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6064 		                 "Space required after '<!ATTLIST'\n");
6065 	}
6066         SKIP_BLANKS;
6067         elemName = xmlParseName(ctxt);
6068 	if (elemName == NULL) {
6069 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6070 			   "ATTLIST: no name for Element\n");
6071 	    return;
6072 	}
6073 	SKIP_BLANKS;
6074 	GROW;
6075 	while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6076 	    const xmlChar *check = CUR_PTR;
6077 	    int type;
6078 	    int def;
6079 	    xmlChar *defaultValue = NULL;
6080 
6081 	    GROW;
6082             tree = NULL;
6083 	    attrName = xmlParseName(ctxt);
6084 	    if (attrName == NULL) {
6085 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6086 			       "ATTLIST: no name for Attribute\n");
6087 		break;
6088 	    }
6089 	    GROW;
6090 	    if (!IS_BLANK_CH(CUR)) {
6091 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6092 		        "Space required after the attribute name\n");
6093 		break;
6094 	    }
6095 	    SKIP_BLANKS;
6096 
6097 	    type = xmlParseAttributeType(ctxt, &tree);
6098 	    if (type <= 0) {
6099 	        break;
6100 	    }
6101 
6102 	    GROW;
6103 	    if (!IS_BLANK_CH(CUR)) {
6104 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6105 			       "Space required after the attribute type\n");
6106 	        if (tree != NULL)
6107 		    xmlFreeEnumeration(tree);
6108 		break;
6109 	    }
6110 	    SKIP_BLANKS;
6111 
6112 	    def = xmlParseDefaultDecl(ctxt, &defaultValue);
6113 	    if (def <= 0) {
6114                 if (defaultValue != NULL)
6115 		    xmlFree(defaultValue);
6116 	        if (tree != NULL)
6117 		    xmlFreeEnumeration(tree);
6118 	        break;
6119 	    }
6120 	    if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6121 	        xmlAttrNormalizeSpace(defaultValue, defaultValue);
6122 
6123 	    GROW;
6124             if (RAW != '>') {
6125 		if (!IS_BLANK_CH(CUR)) {
6126 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6127 			"Space required after the attribute default value\n");
6128 		    if (defaultValue != NULL)
6129 			xmlFree(defaultValue);
6130 		    if (tree != NULL)
6131 			xmlFreeEnumeration(tree);
6132 		    break;
6133 		}
6134 		SKIP_BLANKS;
6135 	    }
6136 	    if (check == CUR_PTR) {
6137 		xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6138 		            "in xmlParseAttributeListDecl\n");
6139 		if (defaultValue != NULL)
6140 		    xmlFree(defaultValue);
6141 	        if (tree != NULL)
6142 		    xmlFreeEnumeration(tree);
6143 		break;
6144 	    }
6145 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6146 		(ctxt->sax->attributeDecl != NULL))
6147 		ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6148 	                        type, def, defaultValue, tree);
6149 	    else if (tree != NULL)
6150 		xmlFreeEnumeration(tree);
6151 
6152 	    if ((ctxt->sax2) && (defaultValue != NULL) &&
6153 	        (def != XML_ATTRIBUTE_IMPLIED) &&
6154 		(def != XML_ATTRIBUTE_REQUIRED)) {
6155 		xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6156 	    }
6157 	    if (ctxt->sax2) {
6158 		xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6159 	    }
6160 	    if (defaultValue != NULL)
6161 	        xmlFree(defaultValue);
6162 	    GROW;
6163 	}
6164 	if (RAW == '>') {
6165 	    if (input != ctxt->input) {
6166 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6167     "Attribute list declaration doesn't start and stop in the same entity\n",
6168                                  NULL, NULL);
6169 	    }
6170 	    NEXT;
6171 	}
6172     }
6173 }
6174 
6175 /**
6176  * xmlParseElementMixedContentDecl:
6177  * @ctxt:  an XML parser context
6178  * @inputchk:  the input used for the current entity, needed for boundary checks
6179  *
6180  * parse the declaration for a Mixed Element content
6181  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6182  *
6183  * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6184  *                '(' S? '#PCDATA' S? ')'
6185  *
6186  * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6187  *
6188  * [ VC: No Duplicate Types ]
6189  * The same name must not appear more than once in a single
6190  * mixed-content declaration.
6191  *
6192  * returns: the list of the xmlElementContentPtr describing the element choices
6193  */
6194 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6195 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6196     xmlElementContentPtr ret = NULL, cur = NULL, n;
6197     const xmlChar *elem = NULL;
6198 
6199     GROW;
6200     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6201 	SKIP(7);
6202 	SKIP_BLANKS;
6203 	SHRINK;
6204 	if (RAW == ')') {
6205 	    if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6206 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6207 "Element content declaration doesn't start and stop in the same entity\n",
6208                                  NULL, NULL);
6209 	    }
6210 	    NEXT;
6211 	    ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6212 	    if (ret == NULL)
6213 	        return(NULL);
6214 	    if (RAW == '*') {
6215 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6216 		NEXT;
6217 	    }
6218 	    return(ret);
6219 	}
6220 	if ((RAW == '(') || (RAW == '|')) {
6221 	    ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6222 	    if (ret == NULL) return(NULL);
6223 	}
6224 	while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6225 	    NEXT;
6226 	    if (elem == NULL) {
6227 	        ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6228 		if (ret == NULL) return(NULL);
6229 		ret->c1 = cur;
6230 		if (cur != NULL)
6231 		    cur->parent = ret;
6232 		cur = ret;
6233 	    } else {
6234 	        n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6235 		if (n == NULL) return(NULL);
6236 		n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6237 		if (n->c1 != NULL)
6238 		    n->c1->parent = n;
6239 	        cur->c2 = n;
6240 		if (n != NULL)
6241 		    n->parent = cur;
6242 		cur = n;
6243 	    }
6244 	    SKIP_BLANKS;
6245 	    elem = xmlParseName(ctxt);
6246 	    if (elem == NULL) {
6247 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6248 			"xmlParseElementMixedContentDecl : Name expected\n");
6249 		xmlFreeDocElementContent(ctxt->myDoc, cur);
6250 		return(NULL);
6251 	    }
6252 	    SKIP_BLANKS;
6253 	    GROW;
6254 	}
6255 	if ((RAW == ')') && (NXT(1) == '*')) {
6256 	    if (elem != NULL) {
6257 		cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6258 		                               XML_ELEMENT_CONTENT_ELEMENT);
6259 		if (cur->c2 != NULL)
6260 		    cur->c2->parent = cur;
6261             }
6262             if (ret != NULL)
6263                 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6264 	    if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6265 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6266 "Element content declaration doesn't start and stop in the same entity\n",
6267 				 NULL, NULL);
6268 	    }
6269 	    SKIP(2);
6270 	} else {
6271 	    xmlFreeDocElementContent(ctxt->myDoc, ret);
6272 	    xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6273 	    return(NULL);
6274 	}
6275 
6276     } else {
6277 	xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6278     }
6279     return(ret);
6280 }
6281 
6282 /**
6283  * xmlParseElementChildrenContentDeclPriv:
6284  * @ctxt:  an XML parser context
6285  * @inputchk:  the input used for the current entity, needed for boundary checks
6286  * @depth: the level of recursion
6287  *
6288  * parse the declaration for a Mixed Element content
6289  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6290  *
6291  *
6292  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6293  *
6294  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6295  *
6296  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6297  *
6298  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6299  *
6300  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6301  * TODO Parameter-entity replacement text must be properly nested
6302  *	with parenthesized groups. That is to say, if either of the
6303  *	opening or closing parentheses in a choice, seq, or Mixed
6304  *	construct is contained in the replacement text for a parameter
6305  *	entity, both must be contained in the same replacement text. For
6306  *	interoperability, if a parameter-entity reference appears in a
6307  *	choice, seq, or Mixed construct, its replacement text should not
6308  *	be empty, and neither the first nor last non-blank character of
6309  *	the replacement text should be a connector (| or ,).
6310  *
6311  * Returns the tree of xmlElementContentPtr describing the element
6312  *          hierarchy.
6313  */
6314 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)6315 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6316                                        int depth) {
6317     xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6318     const xmlChar *elem;
6319     xmlChar type = 0;
6320 
6321     if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6322         (depth >  2048)) {
6323         xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6324 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6325                           depth);
6326 	return(NULL);
6327     }
6328     SKIP_BLANKS;
6329     GROW;
6330     if (RAW == '(') {
6331 	int inputid = ctxt->input->id;
6332 
6333         /* Recurse on first child */
6334 	NEXT;
6335 	SKIP_BLANKS;
6336         cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6337                                                            depth + 1);
6338 	SKIP_BLANKS;
6339 	GROW;
6340     } else {
6341 	elem = xmlParseName(ctxt);
6342 	if (elem == NULL) {
6343 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6344 	    return(NULL);
6345 	}
6346         cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6347 	if (cur == NULL) {
6348 	    xmlErrMemory(ctxt, NULL);
6349 	    return(NULL);
6350 	}
6351 	GROW;
6352 	if (RAW == '?') {
6353 	    cur->ocur = XML_ELEMENT_CONTENT_OPT;
6354 	    NEXT;
6355 	} else if (RAW == '*') {
6356 	    cur->ocur = XML_ELEMENT_CONTENT_MULT;
6357 	    NEXT;
6358 	} else if (RAW == '+') {
6359 	    cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6360 	    NEXT;
6361 	} else {
6362 	    cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6363 	}
6364 	GROW;
6365     }
6366     SKIP_BLANKS;
6367     SHRINK;
6368     while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6369         /*
6370 	 * Each loop we parse one separator and one element.
6371 	 */
6372         if (RAW == ',') {
6373 	    if (type == 0) type = CUR;
6374 
6375 	    /*
6376 	     * Detect "Name | Name , Name" error
6377 	     */
6378 	    else if (type != CUR) {
6379 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6380 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6381 		                  type);
6382 		if ((last != NULL) && (last != ret))
6383 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6384 		if (ret != NULL)
6385 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6386 		return(NULL);
6387 	    }
6388 	    NEXT;
6389 
6390 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6391 	    if (op == NULL) {
6392 		if ((last != NULL) && (last != ret))
6393 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6394 	        xmlFreeDocElementContent(ctxt->myDoc, ret);
6395 		return(NULL);
6396 	    }
6397 	    if (last == NULL) {
6398 		op->c1 = ret;
6399 		if (ret != NULL)
6400 		    ret->parent = op;
6401 		ret = cur = op;
6402 	    } else {
6403 	        cur->c2 = op;
6404 		if (op != NULL)
6405 		    op->parent = cur;
6406 		op->c1 = last;
6407 		if (last != NULL)
6408 		    last->parent = op;
6409 		cur =op;
6410 		last = NULL;
6411 	    }
6412 	} else if (RAW == '|') {
6413 	    if (type == 0) type = CUR;
6414 
6415 	    /*
6416 	     * Detect "Name , Name | Name" error
6417 	     */
6418 	    else if (type != CUR) {
6419 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6420 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6421 				  type);
6422 		if ((last != NULL) && (last != ret))
6423 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6424 		if (ret != NULL)
6425 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6426 		return(NULL);
6427 	    }
6428 	    NEXT;
6429 
6430 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6431 	    if (op == NULL) {
6432 		if ((last != NULL) && (last != ret))
6433 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6434 		if (ret != NULL)
6435 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6436 		return(NULL);
6437 	    }
6438 	    if (last == NULL) {
6439 		op->c1 = ret;
6440 		if (ret != NULL)
6441 		    ret->parent = op;
6442 		ret = cur = op;
6443 	    } else {
6444 	        cur->c2 = op;
6445 		if (op != NULL)
6446 		    op->parent = cur;
6447 		op->c1 = last;
6448 		if (last != NULL)
6449 		    last->parent = op;
6450 		cur =op;
6451 		last = NULL;
6452 	    }
6453 	} else {
6454 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6455 	    if ((last != NULL) && (last != ret))
6456 	        xmlFreeDocElementContent(ctxt->myDoc, last);
6457 	    if (ret != NULL)
6458 		xmlFreeDocElementContent(ctxt->myDoc, ret);
6459 	    return(NULL);
6460 	}
6461 	GROW;
6462 	SKIP_BLANKS;
6463 	GROW;
6464 	if (RAW == '(') {
6465 	    int inputid = ctxt->input->id;
6466 	    /* Recurse on second child */
6467 	    NEXT;
6468 	    SKIP_BLANKS;
6469 	    last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6470                                                           depth + 1);
6471 	    SKIP_BLANKS;
6472 	} else {
6473 	    elem = xmlParseName(ctxt);
6474 	    if (elem == NULL) {
6475 		xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6476 		if (ret != NULL)
6477 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6478 		return(NULL);
6479 	    }
6480 	    last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6481 	    if (last == NULL) {
6482 		if (ret != NULL)
6483 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6484 		return(NULL);
6485 	    }
6486 	    if (RAW == '?') {
6487 		last->ocur = XML_ELEMENT_CONTENT_OPT;
6488 		NEXT;
6489 	    } else if (RAW == '*') {
6490 		last->ocur = XML_ELEMENT_CONTENT_MULT;
6491 		NEXT;
6492 	    } else if (RAW == '+') {
6493 		last->ocur = XML_ELEMENT_CONTENT_PLUS;
6494 		NEXT;
6495 	    } else {
6496 		last->ocur = XML_ELEMENT_CONTENT_ONCE;
6497 	    }
6498 	}
6499 	SKIP_BLANKS;
6500 	GROW;
6501     }
6502     if ((cur != NULL) && (last != NULL)) {
6503         cur->c2 = last;
6504 	if (last != NULL)
6505 	    last->parent = cur;
6506     }
6507     if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6508 	xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6509 "Element content declaration doesn't start and stop in the same entity\n",
6510 			 NULL, NULL);
6511     }
6512     NEXT;
6513     if (RAW == '?') {
6514 	if (ret != NULL) {
6515 	    if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6516 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6517 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6518 	    else
6519 	        ret->ocur = XML_ELEMENT_CONTENT_OPT;
6520 	}
6521 	NEXT;
6522     } else if (RAW == '*') {
6523 	if (ret != NULL) {
6524 	    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6525 	    cur = ret;
6526 	    /*
6527 	     * Some normalization:
6528 	     * (a | b* | c?)* == (a | b | c)*
6529 	     */
6530 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6531 		if ((cur->c1 != NULL) &&
6532 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6533 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6534 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6535 		if ((cur->c2 != NULL) &&
6536 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6537 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6538 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6539 		cur = cur->c2;
6540 	    }
6541 	}
6542 	NEXT;
6543     } else if (RAW == '+') {
6544 	if (ret != NULL) {
6545 	    int found = 0;
6546 
6547 	    if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6548 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6549 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6550 	    else
6551 	        ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6552 	    /*
6553 	     * Some normalization:
6554 	     * (a | b*)+ == (a | b)*
6555 	     * (a | b?)+ == (a | b)*
6556 	     */
6557 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6558 		if ((cur->c1 != NULL) &&
6559 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6560 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6561 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6562 		    found = 1;
6563 		}
6564 		if ((cur->c2 != NULL) &&
6565 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6566 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6567 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6568 		    found = 1;
6569 		}
6570 		cur = cur->c2;
6571 	    }
6572 	    if (found)
6573 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6574 	}
6575 	NEXT;
6576     }
6577     return(ret);
6578 }
6579 
6580 /**
6581  * xmlParseElementChildrenContentDecl:
6582  * @ctxt:  an XML parser context
6583  * @inputchk:  the input used for the current entity, needed for boundary checks
6584  *
6585  * parse the declaration for a Mixed Element content
6586  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6587  *
6588  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6589  *
6590  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6591  *
6592  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6593  *
6594  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6595  *
6596  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6597  * TODO Parameter-entity replacement text must be properly nested
6598  *	with parenthesized groups. That is to say, if either of the
6599  *	opening or closing parentheses in a choice, seq, or Mixed
6600  *	construct is contained in the replacement text for a parameter
6601  *	entity, both must be contained in the same replacement text. For
6602  *	interoperability, if a parameter-entity reference appears in a
6603  *	choice, seq, or Mixed construct, its replacement text should not
6604  *	be empty, and neither the first nor last non-blank character of
6605  *	the replacement text should be a connector (| or ,).
6606  *
6607  * Returns the tree of xmlElementContentPtr describing the element
6608  *          hierarchy.
6609  */
6610 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6611 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6612     /* stub left for API/ABI compat */
6613     return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6614 }
6615 
6616 /**
6617  * xmlParseElementContentDecl:
6618  * @ctxt:  an XML parser context
6619  * @name:  the name of the element being defined.
6620  * @result:  the Element Content pointer will be stored here if any
6621  *
6622  * parse the declaration for an Element content either Mixed or Children,
6623  * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6624  *
6625  * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6626  *
6627  * returns: the type of element content XML_ELEMENT_TYPE_xxx
6628  */
6629 
6630 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6631 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6632                            xmlElementContentPtr *result) {
6633 
6634     xmlElementContentPtr tree = NULL;
6635     int inputid = ctxt->input->id;
6636     int res;
6637 
6638     *result = NULL;
6639 
6640     if (RAW != '(') {
6641 	xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6642 		"xmlParseElementContentDecl : %s '(' expected\n", name);
6643 	return(-1);
6644     }
6645     NEXT;
6646     GROW;
6647     if (ctxt->instate == XML_PARSER_EOF)
6648         return(-1);
6649     SKIP_BLANKS;
6650     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6651         tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6652 	res = XML_ELEMENT_TYPE_MIXED;
6653     } else {
6654         tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6655 	res = XML_ELEMENT_TYPE_ELEMENT;
6656     }
6657     SKIP_BLANKS;
6658     *result = tree;
6659     return(res);
6660 }
6661 
6662 /**
6663  * xmlParseElementDecl:
6664  * @ctxt:  an XML parser context
6665  *
6666  * parse an Element declaration.
6667  *
6668  * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6669  *
6670  * [ VC: Unique Element Type Declaration ]
6671  * No element type may be declared more than once
6672  *
6673  * Returns the type of the element, or -1 in case of error
6674  */
6675 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6676 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6677     const xmlChar *name;
6678     int ret = -1;
6679     xmlElementContentPtr content  = NULL;
6680 
6681     /* GROW; done in the caller */
6682     if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6683 	xmlParserInputPtr input = ctxt->input;
6684 
6685 	SKIP(9);
6686 	if (!IS_BLANK_CH(CUR)) {
6687 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6688 		           "Space required after 'ELEMENT'\n");
6689 	}
6690         SKIP_BLANKS;
6691         name = xmlParseName(ctxt);
6692 	if (name == NULL) {
6693 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6694 			   "xmlParseElementDecl: no name for Element\n");
6695 	    return(-1);
6696 	}
6697 	while ((RAW == 0) && (ctxt->inputNr > 1))
6698 	    xmlPopInput(ctxt);
6699 	if (!IS_BLANK_CH(CUR)) {
6700 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6701 			   "Space required after the element name\n");
6702 	}
6703         SKIP_BLANKS;
6704 	if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6705 	    SKIP(5);
6706 	    /*
6707 	     * Element must always be empty.
6708 	     */
6709 	    ret = XML_ELEMENT_TYPE_EMPTY;
6710 	} else if ((RAW == 'A') && (NXT(1) == 'N') &&
6711 	           (NXT(2) == 'Y')) {
6712 	    SKIP(3);
6713 	    /*
6714 	     * Element is a generic container.
6715 	     */
6716 	    ret = XML_ELEMENT_TYPE_ANY;
6717 	} else if (RAW == '(') {
6718 	    ret = xmlParseElementContentDecl(ctxt, name, &content);
6719 	} else {
6720 	    /*
6721 	     * [ WFC: PEs in Internal Subset ] error handling.
6722 	     */
6723 	    if ((RAW == '%') && (ctxt->external == 0) &&
6724 	        (ctxt->inputNr == 1)) {
6725 		xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6726 	  "PEReference: forbidden within markup decl in internal subset\n");
6727 	    } else {
6728 		xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6729 		      "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6730             }
6731 	    return(-1);
6732 	}
6733 
6734 	SKIP_BLANKS;
6735 	/*
6736 	 * Pop-up of finished entities.
6737 	 */
6738 	while ((RAW == 0) && (ctxt->inputNr > 1))
6739 	    xmlPopInput(ctxt);
6740 	SKIP_BLANKS;
6741 
6742 	if (RAW != '>') {
6743 	    xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6744 	    if (content != NULL) {
6745 		xmlFreeDocElementContent(ctxt->myDoc, content);
6746 	    }
6747 	} else {
6748 	    if (input != ctxt->input) {
6749 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6750     "Element declaration doesn't start and stop in the same entity\n");
6751 	    }
6752 
6753 	    NEXT;
6754 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6755 		(ctxt->sax->elementDecl != NULL)) {
6756 		if (content != NULL)
6757 		    content->parent = NULL;
6758 	        ctxt->sax->elementDecl(ctxt->userData, name, ret,
6759 		                       content);
6760 		if ((content != NULL) && (content->parent == NULL)) {
6761 		    /*
6762 		     * this is a trick: if xmlAddElementDecl is called,
6763 		     * instead of copying the full tree it is plugged directly
6764 		     * if called from the parser. Avoid duplicating the
6765 		     * interfaces or change the API/ABI
6766 		     */
6767 		    xmlFreeDocElementContent(ctxt->myDoc, content);
6768 		}
6769 	    } else if (content != NULL) {
6770 		xmlFreeDocElementContent(ctxt->myDoc, content);
6771 	    }
6772 	}
6773     }
6774     return(ret);
6775 }
6776 
6777 /**
6778  * xmlParseConditionalSections
6779  * @ctxt:  an XML parser context
6780  *
6781  * [61] conditionalSect ::= includeSect | ignoreSect
6782  * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6783  * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6784  * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6785  * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6786  */
6787 
6788 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6789 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6790     int id = ctxt->input->id;
6791 
6792     SKIP(3);
6793     SKIP_BLANKS;
6794     if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6795 	SKIP(7);
6796 	SKIP_BLANKS;
6797 	if (RAW != '[') {
6798 	    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6799 	    xmlHaltParser(ctxt);
6800 	    return;
6801 	} else {
6802 	    if (ctxt->input->id != id) {
6803 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6804 	    "All markup of the conditional section is not in the same entity\n",
6805 				     NULL, NULL);
6806 	    }
6807 	    NEXT;
6808 	}
6809 	if (xmlParserDebugEntities) {
6810 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6811 		xmlGenericError(xmlGenericErrorContext,
6812 			"%s(%d): ", ctxt->input->filename,
6813 			ctxt->input->line);
6814 	    xmlGenericError(xmlGenericErrorContext,
6815 		    "Entering INCLUDE Conditional Section\n");
6816 	}
6817 
6818 	while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6819 	        (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6820 	    const xmlChar *check = CUR_PTR;
6821 	    unsigned int cons = ctxt->input->consumed;
6822 
6823 	    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6824 		xmlParseConditionalSections(ctxt);
6825 	    } else if (IS_BLANK_CH(CUR)) {
6826 		NEXT;
6827 	    } else if (RAW == '%') {
6828 		xmlParsePEReference(ctxt);
6829 	    } else
6830 		xmlParseMarkupDecl(ctxt);
6831 
6832 	    /*
6833 	     * Pop-up of finished entities.
6834 	     */
6835 	    while ((RAW == 0) && (ctxt->inputNr > 1))
6836 		xmlPopInput(ctxt);
6837 
6838 	    if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6839 		xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6840 		break;
6841 	    }
6842 	}
6843 	if (xmlParserDebugEntities) {
6844 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6845 		xmlGenericError(xmlGenericErrorContext,
6846 			"%s(%d): ", ctxt->input->filename,
6847 			ctxt->input->line);
6848 	    xmlGenericError(xmlGenericErrorContext,
6849 		    "Leaving INCLUDE Conditional Section\n");
6850 	}
6851 
6852     } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6853 	int state;
6854 	xmlParserInputState instate;
6855 	int depth = 0;
6856 
6857 	SKIP(6);
6858 	SKIP_BLANKS;
6859 	if (RAW != '[') {
6860 	    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6861 	    xmlHaltParser(ctxt);
6862 	    return;
6863 	} else {
6864 	    if (ctxt->input->id != id) {
6865 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6866 	    "All markup of the conditional section is not in the same entity\n",
6867 				     NULL, NULL);
6868 	    }
6869 	    NEXT;
6870 	}
6871 	if (xmlParserDebugEntities) {
6872 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6873 		xmlGenericError(xmlGenericErrorContext,
6874 			"%s(%d): ", ctxt->input->filename,
6875 			ctxt->input->line);
6876 	    xmlGenericError(xmlGenericErrorContext,
6877 		    "Entering IGNORE Conditional Section\n");
6878 	}
6879 
6880 	/*
6881 	 * Parse up to the end of the conditional section
6882 	 * But disable SAX event generating DTD building in the meantime
6883 	 */
6884 	state = ctxt->disableSAX;
6885 	instate = ctxt->instate;
6886 	if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6887 	ctxt->instate = XML_PARSER_IGNORE;
6888 
6889 	while (((depth >= 0) && (RAW != 0)) &&
6890                (ctxt->instate != XML_PARSER_EOF)) {
6891 	  if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6892 	    depth++;
6893 	    SKIP(3);
6894 	    continue;
6895 	  }
6896 	  if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6897 	    if (--depth >= 0) SKIP(3);
6898 	    continue;
6899 	  }
6900 	  NEXT;
6901 	  continue;
6902 	}
6903 
6904 	ctxt->disableSAX = state;
6905 	ctxt->instate = instate;
6906 
6907 	if (xmlParserDebugEntities) {
6908 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6909 		xmlGenericError(xmlGenericErrorContext,
6910 			"%s(%d): ", ctxt->input->filename,
6911 			ctxt->input->line);
6912 	    xmlGenericError(xmlGenericErrorContext,
6913 		    "Leaving IGNORE Conditional Section\n");
6914 	}
6915 
6916     } else {
6917 	xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6918 	xmlHaltParser(ctxt);
6919 	return;
6920     }
6921 
6922     if (RAW == 0)
6923         SHRINK;
6924 
6925     if (RAW == 0) {
6926 	xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6927     } else {
6928 	if (ctxt->input->id != id) {
6929 	    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6930 	"All markup of the conditional section is not in the same entity\n",
6931 				 NULL, NULL);
6932 	}
6933 	if ((ctxt-> instate != XML_PARSER_EOF) &&
6934 	    ((ctxt->input->cur + 3) <= ctxt->input->end))
6935 	    SKIP(3);
6936     }
6937 }
6938 
6939 /**
6940  * xmlParseMarkupDecl:
6941  * @ctxt:  an XML parser context
6942  *
6943  * parse Markup declarations
6944  *
6945  * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6946  *                     NotationDecl | PI | Comment
6947  *
6948  * [ VC: Proper Declaration/PE Nesting ]
6949  * Parameter-entity replacement text must be properly nested with
6950  * markup declarations. That is to say, if either the first character
6951  * or the last character of a markup declaration (markupdecl above) is
6952  * contained in the replacement text for a parameter-entity reference,
6953  * both must be contained in the same replacement text.
6954  *
6955  * [ WFC: PEs in Internal Subset ]
6956  * In the internal DTD subset, parameter-entity references can occur
6957  * only where markup declarations can occur, not within markup declarations.
6958  * (This does not apply to references that occur in external parameter
6959  * entities or to the external subset.)
6960  */
6961 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)6962 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6963     GROW;
6964     if (CUR == '<') {
6965         if (NXT(1) == '!') {
6966 	    switch (NXT(2)) {
6967 	        case 'E':
6968 		    if (NXT(3) == 'L')
6969 			xmlParseElementDecl(ctxt);
6970 		    else if (NXT(3) == 'N')
6971 			xmlParseEntityDecl(ctxt);
6972 		    break;
6973 	        case 'A':
6974 		    xmlParseAttributeListDecl(ctxt);
6975 		    break;
6976 	        case 'N':
6977 		    xmlParseNotationDecl(ctxt);
6978 		    break;
6979 	        case '-':
6980 		    xmlParseComment(ctxt);
6981 		    break;
6982 		default:
6983 		    /* there is an error but it will be detected later */
6984 		    break;
6985 	    }
6986 	} else if (NXT(1) == '?') {
6987 	    xmlParsePI(ctxt);
6988 	}
6989     }
6990 
6991     /*
6992      * detect requirement to exit there and act accordingly
6993      * and avoid having instate overriden later on
6994      */
6995     if (ctxt->instate == XML_PARSER_EOF)
6996         return;
6997 
6998     /*
6999      * This is only for internal subset. On external entities,
7000      * the replacement is done before parsing stage
7001      */
7002     if ((ctxt->external == 0) && (ctxt->inputNr == 1))
7003 	xmlParsePEReference(ctxt);
7004 
7005     /*
7006      * Conditional sections are allowed from entities included
7007      * by PE References in the internal subset.
7008      */
7009     if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
7010         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7011 	    xmlParseConditionalSections(ctxt);
7012 	}
7013     }
7014 
7015     ctxt->instate = XML_PARSER_DTD;
7016 }
7017 
7018 /**
7019  * xmlParseTextDecl:
7020  * @ctxt:  an XML parser context
7021  *
7022  * parse an XML declaration header for external entities
7023  *
7024  * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7025  */
7026 
7027 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)7028 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7029     xmlChar *version;
7030     const xmlChar *encoding;
7031 
7032     /*
7033      * We know that '<?xml' is here.
7034      */
7035     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7036 	SKIP(5);
7037     } else {
7038 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7039 	return;
7040     }
7041 
7042     if (!IS_BLANK_CH(CUR)) {
7043 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7044 		       "Space needed after '<?xml'\n");
7045     }
7046     SKIP_BLANKS;
7047 
7048     /*
7049      * We may have the VersionInfo here.
7050      */
7051     version = xmlParseVersionInfo(ctxt);
7052     if (version == NULL)
7053 	version = xmlCharStrdup(XML_DEFAULT_VERSION);
7054     else {
7055 	if (!IS_BLANK_CH(CUR)) {
7056 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7057 		           "Space needed here\n");
7058 	}
7059     }
7060     ctxt->input->version = version;
7061 
7062     /*
7063      * We must have the encoding declaration
7064      */
7065     encoding = xmlParseEncodingDecl(ctxt);
7066     if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7067 	/*
7068 	 * The XML REC instructs us to stop parsing right here
7069 	 */
7070         return;
7071     }
7072     if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7073 	xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7074 		       "Missing encoding in text declaration\n");
7075     }
7076 
7077     SKIP_BLANKS;
7078     if ((RAW == '?') && (NXT(1) == '>')) {
7079         SKIP(2);
7080     } else if (RAW == '>') {
7081         /* Deprecated old WD ... */
7082 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7083 	NEXT;
7084     } else {
7085 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7086 	MOVETO_ENDTAG(CUR_PTR);
7087 	NEXT;
7088     }
7089 }
7090 
7091 /**
7092  * xmlParseExternalSubset:
7093  * @ctxt:  an XML parser context
7094  * @ExternalID: the external identifier
7095  * @SystemID: the system identifier (or URL)
7096  *
7097  * parse Markup declarations from an external subset
7098  *
7099  * [30] extSubset ::= textDecl? extSubsetDecl
7100  *
7101  * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7102  */
7103 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)7104 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7105                        const xmlChar *SystemID) {
7106     xmlDetectSAX2(ctxt);
7107     GROW;
7108 
7109     if ((ctxt->encoding == NULL) &&
7110         (ctxt->input->end - ctxt->input->cur >= 4)) {
7111         xmlChar start[4];
7112 	xmlCharEncoding enc;
7113 
7114 	start[0] = RAW;
7115 	start[1] = NXT(1);
7116 	start[2] = NXT(2);
7117 	start[3] = NXT(3);
7118 	enc = xmlDetectCharEncoding(start, 4);
7119 	if (enc != XML_CHAR_ENCODING_NONE)
7120 	    xmlSwitchEncoding(ctxt, enc);
7121     }
7122 
7123     if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7124 	xmlParseTextDecl(ctxt);
7125 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7126 	    /*
7127 	     * The XML REC instructs us to stop parsing right here
7128 	     */
7129 	    xmlHaltParser(ctxt);
7130 	    return;
7131 	}
7132     }
7133     if (ctxt->myDoc == NULL) {
7134         ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7135 	if (ctxt->myDoc == NULL) {
7136 	    xmlErrMemory(ctxt, "New Doc failed");
7137 	    return;
7138 	}
7139 	ctxt->myDoc->properties = XML_DOC_INTERNAL;
7140     }
7141     if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7142         xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7143 
7144     ctxt->instate = XML_PARSER_DTD;
7145     ctxt->external = 1;
7146     while (((RAW == '<') && (NXT(1) == '?')) ||
7147            ((RAW == '<') && (NXT(1) == '!')) ||
7148 	   (RAW == '%') || IS_BLANK_CH(CUR)) {
7149 	const xmlChar *check = CUR_PTR;
7150 	unsigned int cons = ctxt->input->consumed;
7151 
7152 	GROW;
7153         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7154 	    xmlParseConditionalSections(ctxt);
7155 	} else if (IS_BLANK_CH(CUR)) {
7156 	    NEXT;
7157 	} else if (RAW == '%') {
7158             xmlParsePEReference(ctxt);
7159 	} else
7160 	    xmlParseMarkupDecl(ctxt);
7161 
7162 	/*
7163 	 * Pop-up of finished entities.
7164 	 */
7165 	while ((RAW == 0) && (ctxt->inputNr > 1))
7166 	    xmlPopInput(ctxt);
7167 
7168 	if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7169 	    xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7170 	    break;
7171 	}
7172     }
7173 
7174     if (RAW != 0) {
7175 	xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7176     }
7177 
7178 }
7179 
7180 /**
7181  * xmlParseReference:
7182  * @ctxt:  an XML parser context
7183  *
7184  * parse and handle entity references in content, depending on the SAX
7185  * interface, this may end-up in a call to character() if this is a
7186  * CharRef, a predefined entity, if there is no reference() callback.
7187  * or if the parser was asked to switch to that mode.
7188  *
7189  * [67] Reference ::= EntityRef | CharRef
7190  */
7191 void
xmlParseReference(xmlParserCtxtPtr ctxt)7192 xmlParseReference(xmlParserCtxtPtr ctxt) {
7193     xmlEntityPtr ent;
7194     xmlChar *val;
7195     int was_checked;
7196     xmlNodePtr list = NULL;
7197     xmlParserErrors ret = XML_ERR_OK;
7198 
7199 
7200     if (RAW != '&')
7201         return;
7202 
7203     /*
7204      * Simple case of a CharRef
7205      */
7206     if (NXT(1) == '#') {
7207 	int i = 0;
7208 	xmlChar out[10];
7209 	int hex = NXT(2);
7210 	int value = xmlParseCharRef(ctxt);
7211 
7212 	if (value == 0)
7213 	    return;
7214 	if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7215 	    /*
7216 	     * So we are using non-UTF-8 buffers
7217 	     * Check that the char fit on 8bits, if not
7218 	     * generate a CharRef.
7219 	     */
7220 	    if (value <= 0xFF) {
7221 		out[0] = value;
7222 		out[1] = 0;
7223 		if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7224 		    (!ctxt->disableSAX))
7225 		    ctxt->sax->characters(ctxt->userData, out, 1);
7226 	    } else {
7227 		if ((hex == 'x') || (hex == 'X'))
7228 		    snprintf((char *)out, sizeof(out), "#x%X", value);
7229 		else
7230 		    snprintf((char *)out, sizeof(out), "#%d", value);
7231 		if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7232 		    (!ctxt->disableSAX))
7233 		    ctxt->sax->reference(ctxt->userData, out);
7234 	    }
7235 	} else {
7236 	    /*
7237 	     * Just encode the value in UTF-8
7238 	     */
7239 	    COPY_BUF(0 ,out, i, value);
7240 	    out[i] = 0;
7241 	    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7242 		(!ctxt->disableSAX))
7243 		ctxt->sax->characters(ctxt->userData, out, i);
7244 	}
7245 	return;
7246     }
7247 
7248     /*
7249      * We are seeing an entity reference
7250      */
7251     ent = xmlParseEntityRef(ctxt);
7252     if (ent == NULL) return;
7253     if (!ctxt->wellFormed)
7254 	return;
7255     was_checked = ent->checked;
7256 
7257     /* special case of predefined entities */
7258     if ((ent->name == NULL) ||
7259         (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7260 	val = ent->content;
7261 	if (val == NULL) return;
7262 	/*
7263 	 * inline the entity.
7264 	 */
7265 	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7266 	    (!ctxt->disableSAX))
7267 	    ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7268 	return;
7269     }
7270 
7271     /*
7272      * The first reference to the entity trigger a parsing phase
7273      * where the ent->children is filled with the result from
7274      * the parsing.
7275      * Note: external parsed entities will not be loaded, it is not
7276      * required for a non-validating parser, unless the parsing option
7277      * of validating, or substituting entities were given. Doing so is
7278      * far more secure as the parser will only process data coming from
7279      * the document entity by default.
7280      */
7281     if (((ent->checked == 0) ||
7282          ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7283         ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7284          (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7285 	unsigned long oldnbent = ctxt->nbentities;
7286 
7287 	/*
7288 	 * This is a bit hackish but this seems the best
7289 	 * way to make sure both SAX and DOM entity support
7290 	 * behaves okay.
7291 	 */
7292 	void *user_data;
7293 	if (ctxt->userData == ctxt)
7294 	    user_data = NULL;
7295 	else
7296 	    user_data = ctxt->userData;
7297 
7298 	/*
7299 	 * Check that this entity is well formed
7300 	 * 4.3.2: An internal general parsed entity is well-formed
7301 	 * if its replacement text matches the production labeled
7302 	 * content.
7303 	 */
7304 	if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7305 	    ctxt->depth++;
7306 	    ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7307 	                                              user_data, &list);
7308 	    ctxt->depth--;
7309 
7310 	} else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7311 	    ctxt->depth++;
7312 	    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7313 	                                   user_data, ctxt->depth, ent->URI,
7314 					   ent->ExternalID, &list);
7315 	    ctxt->depth--;
7316 	} else {
7317 	    ret = XML_ERR_ENTITY_PE_INTERNAL;
7318 	    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7319 			 "invalid entity type found\n", NULL);
7320 	}
7321 
7322 	/*
7323 	 * Store the number of entities needing parsing for this entity
7324 	 * content and do checkings
7325 	 */
7326 	ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7327 	if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7328 	    ent->checked |= 1;
7329 	if (ret == XML_ERR_ENTITY_LOOP) {
7330 	    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7331 	    xmlFreeNodeList(list);
7332 	    return;
7333 	}
7334 	if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7335 	    xmlFreeNodeList(list);
7336 	    return;
7337 	}
7338 
7339 	if ((ret == XML_ERR_OK) && (list != NULL)) {
7340 	    if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7341 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7342 		(ent->children == NULL)) {
7343 		ent->children = list;
7344 		if (ctxt->replaceEntities) {
7345 		    /*
7346 		     * Prune it directly in the generated document
7347 		     * except for single text nodes.
7348 		     */
7349 		    if (((list->type == XML_TEXT_NODE) &&
7350 			 (list->next == NULL)) ||
7351 			(ctxt->parseMode == XML_PARSE_READER)) {
7352 			list->parent = (xmlNodePtr) ent;
7353 			list = NULL;
7354 			ent->owner = 1;
7355 		    } else {
7356 			ent->owner = 0;
7357 			while (list != NULL) {
7358 			    list->parent = (xmlNodePtr) ctxt->node;
7359 			    list->doc = ctxt->myDoc;
7360 			    if (list->next == NULL)
7361 				ent->last = list;
7362 			    list = list->next;
7363 			}
7364 			list = ent->children;
7365 #ifdef LIBXML_LEGACY_ENABLED
7366 			if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7367 			  xmlAddEntityReference(ent, list, NULL);
7368 #endif /* LIBXML_LEGACY_ENABLED */
7369 		    }
7370 		} else {
7371 		    ent->owner = 1;
7372 		    while (list != NULL) {
7373 			list->parent = (xmlNodePtr) ent;
7374 			xmlSetTreeDoc(list, ent->doc);
7375 			if (list->next == NULL)
7376 			    ent->last = list;
7377 			list = list->next;
7378 		    }
7379 		}
7380 	    } else {
7381 		xmlFreeNodeList(list);
7382 		list = NULL;
7383 	    }
7384 	} else if ((ret != XML_ERR_OK) &&
7385 		   (ret != XML_WAR_UNDECLARED_ENTITY)) {
7386 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7387 		     "Entity '%s' failed to parse\n", ent->name);
7388 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
7389 	} else if (list != NULL) {
7390 	    xmlFreeNodeList(list);
7391 	    list = NULL;
7392 	}
7393 	if (ent->checked == 0)
7394 	    ent->checked = 2;
7395     } else if (ent->checked != 1) {
7396 	ctxt->nbentities += ent->checked / 2;
7397     }
7398 
7399     /*
7400      * Now that the entity content has been gathered
7401      * provide it to the application, this can take different forms based
7402      * on the parsing modes.
7403      */
7404     if (ent->children == NULL) {
7405 	/*
7406 	 * Probably running in SAX mode and the callbacks don't
7407 	 * build the entity content. So unless we already went
7408 	 * though parsing for first checking go though the entity
7409 	 * content to generate callbacks associated to the entity
7410 	 */
7411 	if (was_checked != 0) {
7412 	    void *user_data;
7413 	    /*
7414 	     * This is a bit hackish but this seems the best
7415 	     * way to make sure both SAX and DOM entity support
7416 	     * behaves okay.
7417 	     */
7418 	    if (ctxt->userData == ctxt)
7419 		user_data = NULL;
7420 	    else
7421 		user_data = ctxt->userData;
7422 
7423 	    if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7424 		ctxt->depth++;
7425 		ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7426 				   ent->content, user_data, NULL);
7427 		ctxt->depth--;
7428 	    } else if (ent->etype ==
7429 		       XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7430 		ctxt->depth++;
7431 		ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7432 			   ctxt->sax, user_data, ctxt->depth,
7433 			   ent->URI, ent->ExternalID, NULL);
7434 		ctxt->depth--;
7435 	    } else {
7436 		ret = XML_ERR_ENTITY_PE_INTERNAL;
7437 		xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7438 			     "invalid entity type found\n", NULL);
7439 	    }
7440 	    if (ret == XML_ERR_ENTITY_LOOP) {
7441 		xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7442 		return;
7443 	    }
7444 	}
7445 	if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7446 	    (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7447 	    /*
7448 	     * Entity reference callback comes second, it's somewhat
7449 	     * superfluous but a compatibility to historical behaviour
7450 	     */
7451 	    ctxt->sax->reference(ctxt->userData, ent->name);
7452 	}
7453 	return;
7454     }
7455 
7456     /*
7457      * If we didn't get any children for the entity being built
7458      */
7459     if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7460 	(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7461 	/*
7462 	 * Create a node.
7463 	 */
7464 	ctxt->sax->reference(ctxt->userData, ent->name);
7465 	return;
7466     }
7467 
7468     if ((ctxt->replaceEntities) || (ent->children == NULL))  {
7469 	/*
7470 	 * There is a problem on the handling of _private for entities
7471 	 * (bug 155816): Should we copy the content of the field from
7472 	 * the entity (possibly overwriting some value set by the user
7473 	 * when a copy is created), should we leave it alone, or should
7474 	 * we try to take care of different situations?  The problem
7475 	 * is exacerbated by the usage of this field by the xmlReader.
7476 	 * To fix this bug, we look at _private on the created node
7477 	 * and, if it's NULL, we copy in whatever was in the entity.
7478 	 * If it's not NULL we leave it alone.  This is somewhat of a
7479 	 * hack - maybe we should have further tests to determine
7480 	 * what to do.
7481 	 */
7482 	if ((ctxt->node != NULL) && (ent->children != NULL)) {
7483 	    /*
7484 	     * Seems we are generating the DOM content, do
7485 	     * a simple tree copy for all references except the first
7486 	     * In the first occurrence list contains the replacement.
7487 	     */
7488 	    if (((list == NULL) && (ent->owner == 0)) ||
7489 		(ctxt->parseMode == XML_PARSE_READER)) {
7490 		xmlNodePtr nw = NULL, cur, firstChild = NULL;
7491 
7492 		/*
7493 		 * We are copying here, make sure there is no abuse
7494 		 */
7495 		ctxt->sizeentcopy += ent->length + 5;
7496 		if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7497 		    return;
7498 
7499 		/*
7500 		 * when operating on a reader, the entities definitions
7501 		 * are always owning the entities subtree.
7502 		if (ctxt->parseMode == XML_PARSE_READER)
7503 		    ent->owner = 1;
7504 		 */
7505 
7506 		cur = ent->children;
7507 		while (cur != NULL) {
7508 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7509 		    if (nw != NULL) {
7510 			if (nw->_private == NULL)
7511 			    nw->_private = cur->_private;
7512 			if (firstChild == NULL){
7513 			    firstChild = nw;
7514 			}
7515 			nw = xmlAddChild(ctxt->node, nw);
7516 		    }
7517 		    if (cur == ent->last) {
7518 			/*
7519 			 * needed to detect some strange empty
7520 			 * node cases in the reader tests
7521 			 */
7522 			if ((ctxt->parseMode == XML_PARSE_READER) &&
7523 			    (nw != NULL) &&
7524 			    (nw->type == XML_ELEMENT_NODE) &&
7525 			    (nw->children == NULL))
7526 			    nw->extra = 1;
7527 
7528 			break;
7529 		    }
7530 		    cur = cur->next;
7531 		}
7532 #ifdef LIBXML_LEGACY_ENABLED
7533 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7534 		  xmlAddEntityReference(ent, firstChild, nw);
7535 #endif /* LIBXML_LEGACY_ENABLED */
7536 	    } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7537 		xmlNodePtr nw = NULL, cur, next, last,
7538 			   firstChild = NULL;
7539 
7540 		/*
7541 		 * We are copying here, make sure there is no abuse
7542 		 */
7543 		ctxt->sizeentcopy += ent->length + 5;
7544 		if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7545 		    return;
7546 
7547 		/*
7548 		 * Copy the entity child list and make it the new
7549 		 * entity child list. The goal is to make sure any
7550 		 * ID or REF referenced will be the one from the
7551 		 * document content and not the entity copy.
7552 		 */
7553 		cur = ent->children;
7554 		ent->children = NULL;
7555 		last = ent->last;
7556 		ent->last = NULL;
7557 		while (cur != NULL) {
7558 		    next = cur->next;
7559 		    cur->next = NULL;
7560 		    cur->parent = NULL;
7561 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7562 		    if (nw != NULL) {
7563 			if (nw->_private == NULL)
7564 			    nw->_private = cur->_private;
7565 			if (firstChild == NULL){
7566 			    firstChild = cur;
7567 			}
7568 			xmlAddChild((xmlNodePtr) ent, nw);
7569 			xmlAddChild(ctxt->node, cur);
7570 		    }
7571 		    if (cur == last)
7572 			break;
7573 		    cur = next;
7574 		}
7575 		if (ent->owner == 0)
7576 		    ent->owner = 1;
7577 #ifdef LIBXML_LEGACY_ENABLED
7578 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7579 		  xmlAddEntityReference(ent, firstChild, nw);
7580 #endif /* LIBXML_LEGACY_ENABLED */
7581 	    } else {
7582 		const xmlChar *nbktext;
7583 
7584 		/*
7585 		 * the name change is to avoid coalescing of the
7586 		 * node with a possible previous text one which
7587 		 * would make ent->children a dangling pointer
7588 		 */
7589 		nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7590 					-1);
7591 		if (ent->children->type == XML_TEXT_NODE)
7592 		    ent->children->name = nbktext;
7593 		if ((ent->last != ent->children) &&
7594 		    (ent->last->type == XML_TEXT_NODE))
7595 		    ent->last->name = nbktext;
7596 		xmlAddChildList(ctxt->node, ent->children);
7597 	    }
7598 
7599 	    /*
7600 	     * This is to avoid a nasty side effect, see
7601 	     * characters() in SAX.c
7602 	     */
7603 	    ctxt->nodemem = 0;
7604 	    ctxt->nodelen = 0;
7605 	    return;
7606 	}
7607     }
7608 }
7609 
7610 /**
7611  * xmlParseEntityRef:
7612  * @ctxt:  an XML parser context
7613  *
7614  * parse ENTITY references declarations
7615  *
7616  * [68] EntityRef ::= '&' Name ';'
7617  *
7618  * [ WFC: Entity Declared ]
7619  * In a document without any DTD, a document with only an internal DTD
7620  * subset which contains no parameter entity references, or a document
7621  * with "standalone='yes'", the Name given in the entity reference
7622  * must match that in an entity declaration, except that well-formed
7623  * documents need not declare any of the following entities: amp, lt,
7624  * gt, apos, quot.  The declaration of a parameter entity must precede
7625  * any reference to it.  Similarly, the declaration of a general entity
7626  * must precede any reference to it which appears in a default value in an
7627  * attribute-list declaration. Note that if entities are declared in the
7628  * external subset or in external parameter entities, a non-validating
7629  * processor is not obligated to read and process their declarations;
7630  * for such documents, the rule that an entity must be declared is a
7631  * well-formedness constraint only if standalone='yes'.
7632  *
7633  * [ WFC: Parsed Entity ]
7634  * An entity reference must not contain the name of an unparsed entity
7635  *
7636  * Returns the xmlEntityPtr if found, or NULL otherwise.
7637  */
7638 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7639 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7640     const xmlChar *name;
7641     xmlEntityPtr ent = NULL;
7642 
7643     GROW;
7644     if (ctxt->instate == XML_PARSER_EOF)
7645         return(NULL);
7646 
7647     if (RAW != '&')
7648         return(NULL);
7649     NEXT;
7650     name = xmlParseName(ctxt);
7651     if (name == NULL) {
7652 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7653 		       "xmlParseEntityRef: no name\n");
7654         return(NULL);
7655     }
7656     if (RAW != ';') {
7657 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7658 	return(NULL);
7659     }
7660     NEXT;
7661 
7662     /*
7663      * Predefined entities override any extra definition
7664      */
7665     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7666         ent = xmlGetPredefinedEntity(name);
7667         if (ent != NULL)
7668             return(ent);
7669     }
7670 
7671     /*
7672      * Increase the number of entity references parsed
7673      */
7674     ctxt->nbentities++;
7675 
7676     /*
7677      * Ask first SAX for entity resolution, otherwise try the
7678      * entities which may have stored in the parser context.
7679      */
7680     if (ctxt->sax != NULL) {
7681 	if (ctxt->sax->getEntity != NULL)
7682 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7683 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7684 	    (ctxt->options & XML_PARSE_OLDSAX))
7685 	    ent = xmlGetPredefinedEntity(name);
7686 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7687 	    (ctxt->userData==ctxt)) {
7688 	    ent = xmlSAX2GetEntity(ctxt, name);
7689 	}
7690     }
7691     if (ctxt->instate == XML_PARSER_EOF)
7692 	return(NULL);
7693     /*
7694      * [ WFC: Entity Declared ]
7695      * In a document without any DTD, a document with only an
7696      * internal DTD subset which contains no parameter entity
7697      * references, or a document with "standalone='yes'", the
7698      * Name given in the entity reference must match that in an
7699      * entity declaration, except that well-formed documents
7700      * need not declare any of the following entities: amp, lt,
7701      * gt, apos, quot.
7702      * The declaration of a parameter entity must precede any
7703      * reference to it.
7704      * Similarly, the declaration of a general entity must
7705      * precede any reference to it which appears in a default
7706      * value in an attribute-list declaration. Note that if
7707      * entities are declared in the external subset or in
7708      * external parameter entities, a non-validating processor
7709      * is not obligated to read and process their declarations;
7710      * for such documents, the rule that an entity must be
7711      * declared is a well-formedness constraint only if
7712      * standalone='yes'.
7713      */
7714     if (ent == NULL) {
7715 	if ((ctxt->standalone == 1) ||
7716 	    ((ctxt->hasExternalSubset == 0) &&
7717 	     (ctxt->hasPErefs == 0))) {
7718 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7719 		     "Entity '%s' not defined\n", name);
7720 	} else {
7721 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7722 		     "Entity '%s' not defined\n", name);
7723 	    if ((ctxt->inSubset == 0) &&
7724 		(ctxt->sax != NULL) &&
7725 		(ctxt->sax->reference != NULL)) {
7726 		ctxt->sax->reference(ctxt->userData, name);
7727 	    }
7728 	}
7729 	xmlParserEntityCheck(ctxt, 0, ent, 0);
7730 	ctxt->valid = 0;
7731     }
7732 
7733     /*
7734      * [ WFC: Parsed Entity ]
7735      * An entity reference must not contain the name of an
7736      * unparsed entity
7737      */
7738     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7739 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7740 		 "Entity reference to unparsed entity %s\n", name);
7741     }
7742 
7743     /*
7744      * [ WFC: No External Entity References ]
7745      * Attribute values cannot contain direct or indirect
7746      * entity references to external entities.
7747      */
7748     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7749 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7750 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7751 	     "Attribute references external entity '%s'\n", name);
7752     }
7753     /*
7754      * [ WFC: No < in Attribute Values ]
7755      * The replacement text of any entity referred to directly or
7756      * indirectly in an attribute value (other than "&lt;") must
7757      * not contain a <.
7758      */
7759     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7760 	     (ent != NULL) &&
7761 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7762 	if (((ent->checked & 1) || (ent->checked == 0)) &&
7763 	     (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7764 	    xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7765 	"'<' in entity '%s' is not allowed in attributes values\n", name);
7766         }
7767     }
7768 
7769     /*
7770      * Internal check, no parameter entities here ...
7771      */
7772     else {
7773 	switch (ent->etype) {
7774 	    case XML_INTERNAL_PARAMETER_ENTITY:
7775 	    case XML_EXTERNAL_PARAMETER_ENTITY:
7776 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7777 	     "Attempt to reference the parameter entity '%s'\n",
7778 			      name);
7779 	    break;
7780 	    default:
7781 	    break;
7782 	}
7783     }
7784 
7785     /*
7786      * [ WFC: No Recursion ]
7787      * A parsed entity must not contain a recursive reference
7788      * to itself, either directly or indirectly.
7789      * Done somewhere else
7790      */
7791     return(ent);
7792 }
7793 
7794 /**
7795  * xmlParseStringEntityRef:
7796  * @ctxt:  an XML parser context
7797  * @str:  a pointer to an index in the string
7798  *
7799  * parse ENTITY references declarations, but this version parses it from
7800  * a string value.
7801  *
7802  * [68] EntityRef ::= '&' Name ';'
7803  *
7804  * [ WFC: Entity Declared ]
7805  * In a document without any DTD, a document with only an internal DTD
7806  * subset which contains no parameter entity references, or a document
7807  * with "standalone='yes'", the Name given in the entity reference
7808  * must match that in an entity declaration, except that well-formed
7809  * documents need not declare any of the following entities: amp, lt,
7810  * gt, apos, quot.  The declaration of a parameter entity must precede
7811  * any reference to it.  Similarly, the declaration of a general entity
7812  * must precede any reference to it which appears in a default value in an
7813  * attribute-list declaration. Note that if entities are declared in the
7814  * external subset or in external parameter entities, a non-validating
7815  * processor is not obligated to read and process their declarations;
7816  * for such documents, the rule that an entity must be declared is a
7817  * well-formedness constraint only if standalone='yes'.
7818  *
7819  * [ WFC: Parsed Entity ]
7820  * An entity reference must not contain the name of an unparsed entity
7821  *
7822  * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7823  * is updated to the current location in the string.
7824  */
7825 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7826 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7827     xmlChar *name;
7828     const xmlChar *ptr;
7829     xmlChar cur;
7830     xmlEntityPtr ent = NULL;
7831 
7832     if ((str == NULL) || (*str == NULL))
7833         return(NULL);
7834     ptr = *str;
7835     cur = *ptr;
7836     if (cur != '&')
7837 	return(NULL);
7838 
7839     ptr++;
7840     name = xmlParseStringName(ctxt, &ptr);
7841     if (name == NULL) {
7842 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7843 		       "xmlParseStringEntityRef: no name\n");
7844 	*str = ptr;
7845 	return(NULL);
7846     }
7847     if (*ptr != ';') {
7848 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7849         xmlFree(name);
7850 	*str = ptr;
7851 	return(NULL);
7852     }
7853     ptr++;
7854 
7855 
7856     /*
7857      * Predefined entities override any extra definition
7858      */
7859     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7860         ent = xmlGetPredefinedEntity(name);
7861         if (ent != NULL) {
7862             xmlFree(name);
7863             *str = ptr;
7864             return(ent);
7865         }
7866     }
7867 
7868     /*
7869      * Increate the number of entity references parsed
7870      */
7871     ctxt->nbentities++;
7872 
7873     /*
7874      * Ask first SAX for entity resolution, otherwise try the
7875      * entities which may have stored in the parser context.
7876      */
7877     if (ctxt->sax != NULL) {
7878 	if (ctxt->sax->getEntity != NULL)
7879 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7880 	if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7881 	    ent = xmlGetPredefinedEntity(name);
7882 	if ((ent == NULL) && (ctxt->userData==ctxt)) {
7883 	    ent = xmlSAX2GetEntity(ctxt, name);
7884 	}
7885     }
7886     if (ctxt->instate == XML_PARSER_EOF) {
7887 	xmlFree(name);
7888 	return(NULL);
7889     }
7890 
7891     /*
7892      * [ WFC: Entity Declared ]
7893      * In a document without any DTD, a document with only an
7894      * internal DTD subset which contains no parameter entity
7895      * references, or a document with "standalone='yes'", the
7896      * Name given in the entity reference must match that in an
7897      * entity declaration, except that well-formed documents
7898      * need not declare any of the following entities: amp, lt,
7899      * gt, apos, quot.
7900      * The declaration of a parameter entity must precede any
7901      * reference to it.
7902      * Similarly, the declaration of a general entity must
7903      * precede any reference to it which appears in a default
7904      * value in an attribute-list declaration. Note that if
7905      * entities are declared in the external subset or in
7906      * external parameter entities, a non-validating processor
7907      * is not obligated to read and process their declarations;
7908      * for such documents, the rule that an entity must be
7909      * declared is a well-formedness constraint only if
7910      * standalone='yes'.
7911      */
7912     if (ent == NULL) {
7913 	if ((ctxt->standalone == 1) ||
7914 	    ((ctxt->hasExternalSubset == 0) &&
7915 	     (ctxt->hasPErefs == 0))) {
7916 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7917 		     "Entity '%s' not defined\n", name);
7918 	} else {
7919 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7920 			  "Entity '%s' not defined\n",
7921 			  name);
7922 	}
7923 	xmlParserEntityCheck(ctxt, 0, ent, 0);
7924 	/* TODO ? check regressions ctxt->valid = 0; */
7925     }
7926 
7927     /*
7928      * [ WFC: Parsed Entity ]
7929      * An entity reference must not contain the name of an
7930      * unparsed entity
7931      */
7932     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7933 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7934 		 "Entity reference to unparsed entity %s\n", name);
7935     }
7936 
7937     /*
7938      * [ WFC: No External Entity References ]
7939      * Attribute values cannot contain direct or indirect
7940      * entity references to external entities.
7941      */
7942     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7943 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7944 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7945 	 "Attribute references external entity '%s'\n", name);
7946     }
7947     /*
7948      * [ WFC: No < in Attribute Values ]
7949      * The replacement text of any entity referred to directly or
7950      * indirectly in an attribute value (other than "&lt;") must
7951      * not contain a <.
7952      */
7953     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7954 	     (ent != NULL) && (ent->content != NULL) &&
7955 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7956 	     (xmlStrchr(ent->content, '<'))) {
7957 	xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7958      "'<' in entity '%s' is not allowed in attributes values\n",
7959 			  name);
7960     }
7961 
7962     /*
7963      * Internal check, no parameter entities here ...
7964      */
7965     else {
7966 	switch (ent->etype) {
7967 	    case XML_INTERNAL_PARAMETER_ENTITY:
7968 	    case XML_EXTERNAL_PARAMETER_ENTITY:
7969 		xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7970 	     "Attempt to reference the parameter entity '%s'\n",
7971 				  name);
7972 	    break;
7973 	    default:
7974 	    break;
7975 	}
7976     }
7977 
7978     /*
7979      * [ WFC: No Recursion ]
7980      * A parsed entity must not contain a recursive reference
7981      * to itself, either directly or indirectly.
7982      * Done somewhere else
7983      */
7984 
7985     xmlFree(name);
7986     *str = ptr;
7987     return(ent);
7988 }
7989 
7990 /**
7991  * xmlParsePEReference:
7992  * @ctxt:  an XML parser context
7993  *
7994  * parse PEReference declarations
7995  * The entity content is handled directly by pushing it's content as
7996  * a new input stream.
7997  *
7998  * [69] PEReference ::= '%' Name ';'
7999  *
8000  * [ WFC: No Recursion ]
8001  * A parsed entity must not contain a recursive
8002  * reference to itself, either directly or indirectly.
8003  *
8004  * [ WFC: Entity Declared ]
8005  * In a document without any DTD, a document with only an internal DTD
8006  * subset which contains no parameter entity references, or a document
8007  * with "standalone='yes'", ...  ... The declaration of a parameter
8008  * entity must precede any reference to it...
8009  *
8010  * [ VC: Entity Declared ]
8011  * In a document with an external subset or external parameter entities
8012  * with "standalone='no'", ...  ... The declaration of a parameter entity
8013  * must precede any reference to it...
8014  *
8015  * [ WFC: In DTD ]
8016  * Parameter-entity references may only appear in the DTD.
8017  * NOTE: misleading but this is handled.
8018  */
8019 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)8020 xmlParsePEReference(xmlParserCtxtPtr ctxt)
8021 {
8022     const xmlChar *name;
8023     xmlEntityPtr entity = NULL;
8024     xmlParserInputPtr input;
8025 
8026     if (RAW != '%')
8027         return;
8028     NEXT;
8029     name = xmlParseName(ctxt);
8030     if (name == NULL) {
8031 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8032 		       "xmlParsePEReference: no name\n");
8033 	return;
8034     }
8035     if (RAW != ';') {
8036 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8037         return;
8038     }
8039 
8040     NEXT;
8041 
8042     /*
8043      * Increate the number of entity references parsed
8044      */
8045     ctxt->nbentities++;
8046 
8047     /*
8048      * Request the entity from SAX
8049      */
8050     if ((ctxt->sax != NULL) &&
8051 	(ctxt->sax->getParameterEntity != NULL))
8052 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8053     if (ctxt->instate == XML_PARSER_EOF)
8054 	return;
8055     if (entity == NULL) {
8056 	/*
8057 	 * [ WFC: Entity Declared ]
8058 	 * In a document without any DTD, a document with only an
8059 	 * internal DTD subset which contains no parameter entity
8060 	 * references, or a document with "standalone='yes'", ...
8061 	 * ... The declaration of a parameter entity must precede
8062 	 * any reference to it...
8063 	 */
8064 	if ((ctxt->standalone == 1) ||
8065 	    ((ctxt->hasExternalSubset == 0) &&
8066 	     (ctxt->hasPErefs == 0))) {
8067 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8068 			      "PEReference: %%%s; not found\n",
8069 			      name);
8070 	} else {
8071 	    /*
8072 	     * [ VC: Entity Declared ]
8073 	     * In a document with an external subset or external
8074 	     * parameter entities with "standalone='no'", ...
8075 	     * ... The declaration of a parameter entity must
8076 	     * precede any reference to it...
8077 	     */
8078 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8079 			  "PEReference: %%%s; not found\n",
8080 			  name, NULL);
8081 	    ctxt->valid = 0;
8082 	}
8083 	xmlParserEntityCheck(ctxt, 0, NULL, 0);
8084     } else {
8085 	/*
8086 	 * Internal checking in case the entity quest barfed
8087 	 */
8088 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8089 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8090 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8091 		  "Internal: %%%s; is not a parameter entity\n",
8092 			  name, NULL);
8093 	} else if (ctxt->input->free != deallocblankswrapper) {
8094 	    input = xmlNewBlanksWrapperInputStream(ctxt, entity);
8095 	    if (xmlPushInput(ctxt, input) < 0)
8096 		return;
8097 	} else {
8098 	    /*
8099 	     * TODO !!!
8100 	     * handle the extra spaces added before and after
8101 	     * c.f. http://www.w3.org/TR/REC-xml#as-PE
8102 	     */
8103 	    input = xmlNewEntityInputStream(ctxt, entity);
8104 	    if (xmlPushInput(ctxt, input) < 0)
8105 		return;
8106 	    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8107 		(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8108 		(IS_BLANK_CH(NXT(5)))) {
8109 		xmlParseTextDecl(ctxt);
8110 		if (ctxt->errNo ==
8111 		    XML_ERR_UNSUPPORTED_ENCODING) {
8112 		    /*
8113 		     * The XML REC instructs us to stop parsing
8114 		     * right here
8115 		     */
8116 		    xmlHaltParser(ctxt);
8117 		    return;
8118 		}
8119 	    }
8120 	}
8121     }
8122     ctxt->hasPErefs = 1;
8123 }
8124 
8125 /**
8126  * xmlLoadEntityContent:
8127  * @ctxt:  an XML parser context
8128  * @entity: an unloaded system entity
8129  *
8130  * Load the original content of the given system entity from the
8131  * ExternalID/SystemID given. This is to be used for Included in Literal
8132  * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8133  *
8134  * Returns 0 in case of success and -1 in case of failure
8135  */
8136 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)8137 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8138     xmlParserInputPtr input;
8139     xmlBufferPtr buf;
8140     int l, c;
8141     int count = 0;
8142 
8143     if ((ctxt == NULL) || (entity == NULL) ||
8144         ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8145 	 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8146 	(entity->content != NULL)) {
8147 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8148 	            "xmlLoadEntityContent parameter error");
8149         return(-1);
8150     }
8151 
8152     if (xmlParserDebugEntities)
8153 	xmlGenericError(xmlGenericErrorContext,
8154 		"Reading %s entity content input\n", entity->name);
8155 
8156     buf = xmlBufferCreate();
8157     if (buf == NULL) {
8158 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8159 	            "xmlLoadEntityContent parameter error");
8160         return(-1);
8161     }
8162 
8163     input = xmlNewEntityInputStream(ctxt, entity);
8164     if (input == NULL) {
8165 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8166 	            "xmlLoadEntityContent input error");
8167 	xmlBufferFree(buf);
8168         return(-1);
8169     }
8170 
8171     /*
8172      * Push the entity as the current input, read char by char
8173      * saving to the buffer until the end of the entity or an error
8174      */
8175     if (xmlPushInput(ctxt, input) < 0) {
8176         xmlBufferFree(buf);
8177 	return(-1);
8178     }
8179 
8180     GROW;
8181     c = CUR_CHAR(l);
8182     while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8183            (IS_CHAR(c))) {
8184         xmlBufferAdd(buf, ctxt->input->cur, l);
8185 	if (count++ > XML_PARSER_CHUNK_SIZE) {
8186 	    count = 0;
8187 	    GROW;
8188             if (ctxt->instate == XML_PARSER_EOF) {
8189                 xmlBufferFree(buf);
8190                 return(-1);
8191             }
8192 	}
8193 	NEXTL(l);
8194 	c = CUR_CHAR(l);
8195 	if (c == 0) {
8196 	    count = 0;
8197 	    GROW;
8198             if (ctxt->instate == XML_PARSER_EOF) {
8199                 xmlBufferFree(buf);
8200                 return(-1);
8201             }
8202 	    c = CUR_CHAR(l);
8203 	}
8204     }
8205 
8206     if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8207         xmlPopInput(ctxt);
8208     } else if (!IS_CHAR(c)) {
8209         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8210                           "xmlLoadEntityContent: invalid char value %d\n",
8211 	                  c);
8212 	xmlBufferFree(buf);
8213 	return(-1);
8214     }
8215     entity->content = buf->content;
8216     buf->content = NULL;
8217     xmlBufferFree(buf);
8218 
8219     return(0);
8220 }
8221 
8222 /**
8223  * xmlParseStringPEReference:
8224  * @ctxt:  an XML parser context
8225  * @str:  a pointer to an index in the string
8226  *
8227  * parse PEReference declarations
8228  *
8229  * [69] PEReference ::= '%' Name ';'
8230  *
8231  * [ WFC: No Recursion ]
8232  * A parsed entity must not contain a recursive
8233  * reference to itself, either directly or indirectly.
8234  *
8235  * [ WFC: Entity Declared ]
8236  * In a document without any DTD, a document with only an internal DTD
8237  * subset which contains no parameter entity references, or a document
8238  * with "standalone='yes'", ...  ... The declaration of a parameter
8239  * entity must precede any reference to it...
8240  *
8241  * [ VC: Entity Declared ]
8242  * In a document with an external subset or external parameter entities
8243  * with "standalone='no'", ...  ... The declaration of a parameter entity
8244  * must precede any reference to it...
8245  *
8246  * [ WFC: In DTD ]
8247  * Parameter-entity references may only appear in the DTD.
8248  * NOTE: misleading but this is handled.
8249  *
8250  * Returns the string of the entity content.
8251  *         str is updated to the current value of the index
8252  */
8253 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)8254 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8255     const xmlChar *ptr;
8256     xmlChar cur;
8257     xmlChar *name;
8258     xmlEntityPtr entity = NULL;
8259 
8260     if ((str == NULL) || (*str == NULL)) return(NULL);
8261     ptr = *str;
8262     cur = *ptr;
8263     if (cur != '%')
8264         return(NULL);
8265     ptr++;
8266     name = xmlParseStringName(ctxt, &ptr);
8267     if (name == NULL) {
8268 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8269 		       "xmlParseStringPEReference: no name\n");
8270 	*str = ptr;
8271 	return(NULL);
8272     }
8273     cur = *ptr;
8274     if (cur != ';') {
8275 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8276 	xmlFree(name);
8277 	*str = ptr;
8278 	return(NULL);
8279     }
8280     ptr++;
8281 
8282     /*
8283      * Increate the number of entity references parsed
8284      */
8285     ctxt->nbentities++;
8286 
8287     /*
8288      * Request the entity from SAX
8289      */
8290     if ((ctxt->sax != NULL) &&
8291 	(ctxt->sax->getParameterEntity != NULL))
8292 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8293     if (ctxt->instate == XML_PARSER_EOF) {
8294 	xmlFree(name);
8295 	return(NULL);
8296     }
8297     if (entity == NULL) {
8298 	/*
8299 	 * [ WFC: Entity Declared ]
8300 	 * In a document without any DTD, a document with only an
8301 	 * internal DTD subset which contains no parameter entity
8302 	 * references, or a document with "standalone='yes'", ...
8303 	 * ... The declaration of a parameter entity must precede
8304 	 * any reference to it...
8305 	 */
8306 	if ((ctxt->standalone == 1) ||
8307 	    ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8308 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8309 		 "PEReference: %%%s; not found\n", name);
8310 	} else {
8311 	    /*
8312 	     * [ VC: Entity Declared ]
8313 	     * In a document with an external subset or external
8314 	     * parameter entities with "standalone='no'", ...
8315 	     * ... The declaration of a parameter entity must
8316 	     * precede any reference to it...
8317 	     */
8318 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8319 			  "PEReference: %%%s; not found\n",
8320 			  name, NULL);
8321 	    ctxt->valid = 0;
8322 	}
8323 	xmlParserEntityCheck(ctxt, 0, NULL, 0);
8324     } else {
8325 	/*
8326 	 * Internal checking in case the entity quest barfed
8327 	 */
8328 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8329 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8330 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8331 			  "%%%s; is not a parameter entity\n",
8332 			  name, NULL);
8333 	}
8334     }
8335     ctxt->hasPErefs = 1;
8336     xmlFree(name);
8337     *str = ptr;
8338     return(entity);
8339 }
8340 
8341 /**
8342  * xmlParseDocTypeDecl:
8343  * @ctxt:  an XML parser context
8344  *
8345  * parse a DOCTYPE declaration
8346  *
8347  * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8348  *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8349  *
8350  * [ VC: Root Element Type ]
8351  * The Name in the document type declaration must match the element
8352  * type of the root element.
8353  */
8354 
8355 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)8356 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8357     const xmlChar *name = NULL;
8358     xmlChar *ExternalID = NULL;
8359     xmlChar *URI = NULL;
8360 
8361     /*
8362      * We know that '<!DOCTYPE' has been detected.
8363      */
8364     SKIP(9);
8365 
8366     SKIP_BLANKS;
8367 
8368     /*
8369      * Parse the DOCTYPE name.
8370      */
8371     name = xmlParseName(ctxt);
8372     if (name == NULL) {
8373 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8374 		       "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8375     }
8376     ctxt->intSubName = name;
8377 
8378     SKIP_BLANKS;
8379 
8380     /*
8381      * Check for SystemID and ExternalID
8382      */
8383     URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8384 
8385     if ((URI != NULL) || (ExternalID != NULL)) {
8386         ctxt->hasExternalSubset = 1;
8387     }
8388     ctxt->extSubURI = URI;
8389     ctxt->extSubSystem = ExternalID;
8390 
8391     SKIP_BLANKS;
8392 
8393     /*
8394      * Create and update the internal subset.
8395      */
8396     if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8397 	(!ctxt->disableSAX))
8398 	ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8399     if (ctxt->instate == XML_PARSER_EOF)
8400 	return;
8401 
8402     /*
8403      * Is there any internal subset declarations ?
8404      * they are handled separately in xmlParseInternalSubset()
8405      */
8406     if (RAW == '[')
8407 	return;
8408 
8409     /*
8410      * We should be at the end of the DOCTYPE declaration.
8411      */
8412     if (RAW != '>') {
8413 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8414     }
8415     NEXT;
8416 }
8417 
8418 /**
8419  * xmlParseInternalSubset:
8420  * @ctxt:  an XML parser context
8421  *
8422  * parse the internal subset declaration
8423  *
8424  * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8425  */
8426 
8427 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8428 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8429     /*
8430      * Is there any DTD definition ?
8431      */
8432     if (RAW == '[') {
8433         ctxt->instate = XML_PARSER_DTD;
8434         NEXT;
8435 	/*
8436 	 * Parse the succession of Markup declarations and
8437 	 * PEReferences.
8438 	 * Subsequence (markupdecl | PEReference | S)*
8439 	 */
8440 	while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
8441 	    const xmlChar *check = CUR_PTR;
8442 	    unsigned int cons = ctxt->input->consumed;
8443 
8444 	    SKIP_BLANKS;
8445 	    xmlParseMarkupDecl(ctxt);
8446 	    xmlParsePEReference(ctxt);
8447 
8448 	    /*
8449 	     * Pop-up of finished entities.
8450 	     */
8451 	    while ((RAW == 0) && (ctxt->inputNr > 1))
8452 		xmlPopInput(ctxt);
8453 
8454 	    if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8455 		xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8456 	     "xmlParseInternalSubset: error detected in Markup declaration\n");
8457 		break;
8458 	    }
8459 	}
8460 	if (RAW == ']') {
8461 	    NEXT;
8462 	    SKIP_BLANKS;
8463 	}
8464     }
8465 
8466     /*
8467      * We should be at the end of the DOCTYPE declaration.
8468      */
8469     if (RAW != '>') {
8470 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8471     }
8472     NEXT;
8473 }
8474 
8475 #ifdef LIBXML_SAX1_ENABLED
8476 /**
8477  * xmlParseAttribute:
8478  * @ctxt:  an XML parser context
8479  * @value:  a xmlChar ** used to store the value of the attribute
8480  *
8481  * parse an attribute
8482  *
8483  * [41] Attribute ::= Name Eq AttValue
8484  *
8485  * [ WFC: No External Entity References ]
8486  * Attribute values cannot contain direct or indirect entity references
8487  * to external entities.
8488  *
8489  * [ WFC: No < in Attribute Values ]
8490  * The replacement text of any entity referred to directly or indirectly in
8491  * an attribute value (other than "&lt;") must not contain a <.
8492  *
8493  * [ VC: Attribute Value Type ]
8494  * The attribute must have been declared; the value must be of the type
8495  * declared for it.
8496  *
8497  * [25] Eq ::= S? '=' S?
8498  *
8499  * With namespace:
8500  *
8501  * [NS 11] Attribute ::= QName Eq AttValue
8502  *
8503  * Also the case QName == xmlns:??? is handled independently as a namespace
8504  * definition.
8505  *
8506  * Returns the attribute name, and the value in *value.
8507  */
8508 
8509 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8510 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8511     const xmlChar *name;
8512     xmlChar *val;
8513 
8514     *value = NULL;
8515     GROW;
8516     name = xmlParseName(ctxt);
8517     if (name == NULL) {
8518 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8519 	               "error parsing attribute name\n");
8520         return(NULL);
8521     }
8522 
8523     /*
8524      * read the value
8525      */
8526     SKIP_BLANKS;
8527     if (RAW == '=') {
8528         NEXT;
8529 	SKIP_BLANKS;
8530 	val = xmlParseAttValue(ctxt);
8531 	ctxt->instate = XML_PARSER_CONTENT;
8532     } else {
8533 	xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8534 	       "Specification mandate value for attribute %s\n", name);
8535 	return(NULL);
8536     }
8537 
8538     /*
8539      * Check that xml:lang conforms to the specification
8540      * No more registered as an error, just generate a warning now
8541      * since this was deprecated in XML second edition
8542      */
8543     if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8544 	if (!xmlCheckLanguageID(val)) {
8545 	    xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8546 		          "Malformed value for xml:lang : %s\n",
8547 			  val, NULL);
8548 	}
8549     }
8550 
8551     /*
8552      * Check that xml:space conforms to the specification
8553      */
8554     if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8555 	if (xmlStrEqual(val, BAD_CAST "default"))
8556 	    *(ctxt->space) = 0;
8557 	else if (xmlStrEqual(val, BAD_CAST "preserve"))
8558 	    *(ctxt->space) = 1;
8559 	else {
8560 		xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8561 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8562                                  val, NULL);
8563 	}
8564     }
8565 
8566     *value = val;
8567     return(name);
8568 }
8569 
8570 /**
8571  * xmlParseStartTag:
8572  * @ctxt:  an XML parser context
8573  *
8574  * parse a start of tag either for rule element or
8575  * EmptyElement. In both case we don't parse the tag closing chars.
8576  *
8577  * [40] STag ::= '<' Name (S Attribute)* S? '>'
8578  *
8579  * [ WFC: Unique Att Spec ]
8580  * No attribute name may appear more than once in the same start-tag or
8581  * empty-element tag.
8582  *
8583  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8584  *
8585  * [ WFC: Unique Att Spec ]
8586  * No attribute name may appear more than once in the same start-tag or
8587  * empty-element tag.
8588  *
8589  * With namespace:
8590  *
8591  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8592  *
8593  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8594  *
8595  * Returns the element name parsed
8596  */
8597 
8598 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8599 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8600     const xmlChar *name;
8601     const xmlChar *attname;
8602     xmlChar *attvalue;
8603     const xmlChar **atts = ctxt->atts;
8604     int nbatts = 0;
8605     int maxatts = ctxt->maxatts;
8606     int i;
8607 
8608     if (RAW != '<') return(NULL);
8609     NEXT1;
8610 
8611     name = xmlParseName(ctxt);
8612     if (name == NULL) {
8613 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8614 	     "xmlParseStartTag: invalid element name\n");
8615         return(NULL);
8616     }
8617 
8618     /*
8619      * Now parse the attributes, it ends up with the ending
8620      *
8621      * (S Attribute)* S?
8622      */
8623     SKIP_BLANKS;
8624     GROW;
8625 
8626     while (((RAW != '>') &&
8627 	   ((RAW != '/') || (NXT(1) != '>')) &&
8628 	   (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8629 	const xmlChar *q = CUR_PTR;
8630 	unsigned int cons = ctxt->input->consumed;
8631 
8632 	attname = xmlParseAttribute(ctxt, &attvalue);
8633         if ((attname != NULL) && (attvalue != NULL)) {
8634 	    /*
8635 	     * [ WFC: Unique Att Spec ]
8636 	     * No attribute name may appear more than once in the same
8637 	     * start-tag or empty-element tag.
8638 	     */
8639 	    for (i = 0; i < nbatts;i += 2) {
8640 	        if (xmlStrEqual(atts[i], attname)) {
8641 		    xmlErrAttributeDup(ctxt, NULL, attname);
8642 		    xmlFree(attvalue);
8643 		    goto failed;
8644 		}
8645 	    }
8646 	    /*
8647 	     * Add the pair to atts
8648 	     */
8649 	    if (atts == NULL) {
8650 	        maxatts = 22; /* allow for 10 attrs by default */
8651 	        atts = (const xmlChar **)
8652 		       xmlMalloc(maxatts * sizeof(xmlChar *));
8653 		if (atts == NULL) {
8654 		    xmlErrMemory(ctxt, NULL);
8655 		    if (attvalue != NULL)
8656 			xmlFree(attvalue);
8657 		    goto failed;
8658 		}
8659 		ctxt->atts = atts;
8660 		ctxt->maxatts = maxatts;
8661 	    } else if (nbatts + 4 > maxatts) {
8662 	        const xmlChar **n;
8663 
8664 	        maxatts *= 2;
8665 	        n = (const xmlChar **) xmlRealloc((void *) atts,
8666 					     maxatts * sizeof(const xmlChar *));
8667 		if (n == NULL) {
8668 		    xmlErrMemory(ctxt, NULL);
8669 		    if (attvalue != NULL)
8670 			xmlFree(attvalue);
8671 		    goto failed;
8672 		}
8673 		atts = n;
8674 		ctxt->atts = atts;
8675 		ctxt->maxatts = maxatts;
8676 	    }
8677 	    atts[nbatts++] = attname;
8678 	    atts[nbatts++] = attvalue;
8679 	    atts[nbatts] = NULL;
8680 	    atts[nbatts + 1] = NULL;
8681 	} else {
8682 	    if (attvalue != NULL)
8683 		xmlFree(attvalue);
8684 	}
8685 
8686 failed:
8687 
8688 	GROW
8689 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8690 	    break;
8691 	if (!IS_BLANK_CH(RAW)) {
8692 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8693 			   "attributes construct error\n");
8694 	}
8695 	SKIP_BLANKS;
8696         if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8697             (attname == NULL) && (attvalue == NULL)) {
8698 	    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8699 			   "xmlParseStartTag: problem parsing attributes\n");
8700 	    break;
8701 	}
8702 	SHRINK;
8703         GROW;
8704     }
8705 
8706     /*
8707      * SAX: Start of Element !
8708      */
8709     if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8710 	(!ctxt->disableSAX)) {
8711 	if (nbatts > 0)
8712 	    ctxt->sax->startElement(ctxt->userData, name, atts);
8713 	else
8714 	    ctxt->sax->startElement(ctxt->userData, name, NULL);
8715     }
8716 
8717     if (atts != NULL) {
8718         /* Free only the content strings */
8719         for (i = 1;i < nbatts;i+=2)
8720 	    if (atts[i] != NULL)
8721 	       xmlFree((xmlChar *) atts[i]);
8722     }
8723     return(name);
8724 }
8725 
8726 /**
8727  * xmlParseEndTag1:
8728  * @ctxt:  an XML parser context
8729  * @line:  line of the start tag
8730  * @nsNr:  number of namespaces on the start tag
8731  *
8732  * parse an end of tag
8733  *
8734  * [42] ETag ::= '</' Name S? '>'
8735  *
8736  * With namespace
8737  *
8738  * [NS 9] ETag ::= '</' QName S? '>'
8739  */
8740 
8741 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8742 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8743     const xmlChar *name;
8744 
8745     GROW;
8746     if ((RAW != '<') || (NXT(1) != '/')) {
8747 	xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8748 		       "xmlParseEndTag: '</' not found\n");
8749 	return;
8750     }
8751     SKIP(2);
8752 
8753     name = xmlParseNameAndCompare(ctxt,ctxt->name);
8754 
8755     /*
8756      * We should definitely be at the ending "S? '>'" part
8757      */
8758     GROW;
8759     SKIP_BLANKS;
8760     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8761 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8762     } else
8763 	NEXT1;
8764 
8765     /*
8766      * [ WFC: Element Type Match ]
8767      * The Name in an element's end-tag must match the element type in the
8768      * start-tag.
8769      *
8770      */
8771     if (name != (xmlChar*)1) {
8772         if (name == NULL) name = BAD_CAST "unparseable";
8773         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8774 		     "Opening and ending tag mismatch: %s line %d and %s\n",
8775 		                ctxt->name, line, name);
8776     }
8777 
8778     /*
8779      * SAX: End of Tag
8780      */
8781     if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8782 	(!ctxt->disableSAX))
8783         ctxt->sax->endElement(ctxt->userData, ctxt->name);
8784 
8785     namePop(ctxt);
8786     spacePop(ctxt);
8787     return;
8788 }
8789 
8790 /**
8791  * xmlParseEndTag:
8792  * @ctxt:  an XML parser context
8793  *
8794  * parse an end of tag
8795  *
8796  * [42] ETag ::= '</' Name S? '>'
8797  *
8798  * With namespace
8799  *
8800  * [NS 9] ETag ::= '</' QName S? '>'
8801  */
8802 
8803 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8804 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8805     xmlParseEndTag1(ctxt, 0);
8806 }
8807 #endif /* LIBXML_SAX1_ENABLED */
8808 
8809 /************************************************************************
8810  *									*
8811  *		      SAX 2 specific operations				*
8812  *									*
8813  ************************************************************************/
8814 
8815 /*
8816  * xmlGetNamespace:
8817  * @ctxt:  an XML parser context
8818  * @prefix:  the prefix to lookup
8819  *
8820  * Lookup the namespace name for the @prefix (which ca be NULL)
8821  * The prefix must come from the @ctxt->dict dictionnary
8822  *
8823  * Returns the namespace name or NULL if not bound
8824  */
8825 static const xmlChar *
xmlGetNamespace(xmlParserCtxtPtr ctxt,const xmlChar * prefix)8826 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8827     int i;
8828 
8829     if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8830     for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8831         if (ctxt->nsTab[i] == prefix) {
8832 	    if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8833 	        return(NULL);
8834 	    return(ctxt->nsTab[i + 1]);
8835 	}
8836     return(NULL);
8837 }
8838 
8839 /**
8840  * xmlParseQName:
8841  * @ctxt:  an XML parser context
8842  * @prefix:  pointer to store the prefix part
8843  *
8844  * parse an XML Namespace QName
8845  *
8846  * [6]  QName  ::= (Prefix ':')? LocalPart
8847  * [7]  Prefix  ::= NCName
8848  * [8]  LocalPart  ::= NCName
8849  *
8850  * Returns the Name parsed or NULL
8851  */
8852 
8853 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8854 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8855     const xmlChar *l, *p;
8856 
8857     GROW;
8858 
8859     l = xmlParseNCName(ctxt);
8860     if (l == NULL) {
8861         if (CUR == ':') {
8862 	    l = xmlParseName(ctxt);
8863 	    if (l != NULL) {
8864 	        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8865 		         "Failed to parse QName '%s'\n", l, NULL, NULL);
8866 		*prefix = NULL;
8867 		return(l);
8868 	    }
8869 	}
8870         return(NULL);
8871     }
8872     if (CUR == ':') {
8873         NEXT;
8874 	p = l;
8875 	l = xmlParseNCName(ctxt);
8876 	if (l == NULL) {
8877 	    xmlChar *tmp;
8878 
8879             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8880 	             "Failed to parse QName '%s:'\n", p, NULL, NULL);
8881 	    l = xmlParseNmtoken(ctxt);
8882 	    if (l == NULL)
8883 		tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8884 	    else {
8885 		tmp = xmlBuildQName(l, p, NULL, 0);
8886 		xmlFree((char *)l);
8887 	    }
8888 	    p = xmlDictLookup(ctxt->dict, tmp, -1);
8889 	    if (tmp != NULL) xmlFree(tmp);
8890 	    *prefix = NULL;
8891 	    return(p);
8892 	}
8893 	if (CUR == ':') {
8894 	    xmlChar *tmp;
8895 
8896             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8897 	             "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8898 	    NEXT;
8899 	    tmp = (xmlChar *) xmlParseName(ctxt);
8900 	    if (tmp != NULL) {
8901 	        tmp = xmlBuildQName(tmp, l, NULL, 0);
8902 		l = xmlDictLookup(ctxt->dict, tmp, -1);
8903 		if (tmp != NULL) xmlFree(tmp);
8904 		*prefix = p;
8905 		return(l);
8906 	    }
8907 	    tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8908 	    l = xmlDictLookup(ctxt->dict, tmp, -1);
8909 	    if (tmp != NULL) xmlFree(tmp);
8910 	    *prefix = p;
8911 	    return(l);
8912 	}
8913 	*prefix = p;
8914     } else
8915         *prefix = NULL;
8916     return(l);
8917 }
8918 
8919 /**
8920  * xmlParseQNameAndCompare:
8921  * @ctxt:  an XML parser context
8922  * @name:  the localname
8923  * @prefix:  the prefix, if any.
8924  *
8925  * parse an XML name and compares for match
8926  * (specialized for endtag parsing)
8927  *
8928  * Returns NULL for an illegal name, (xmlChar*) 1 for success
8929  * and the name for mismatch
8930  */
8931 
8932 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8933 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8934                         xmlChar const *prefix) {
8935     const xmlChar *cmp;
8936     const xmlChar *in;
8937     const xmlChar *ret;
8938     const xmlChar *prefix2;
8939 
8940     if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8941 
8942     GROW;
8943     in = ctxt->input->cur;
8944 
8945     cmp = prefix;
8946     while (*in != 0 && *in == *cmp) {
8947 	++in;
8948 	++cmp;
8949     }
8950     if ((*cmp == 0) && (*in == ':')) {
8951         in++;
8952 	cmp = name;
8953 	while (*in != 0 && *in == *cmp) {
8954 	    ++in;
8955 	    ++cmp;
8956 	}
8957 	if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8958 	    /* success */
8959 	    ctxt->input->cur = in;
8960 	    return((const xmlChar*) 1);
8961 	}
8962     }
8963     /*
8964      * all strings coms from the dictionary, equality can be done directly
8965      */
8966     ret = xmlParseQName (ctxt, &prefix2);
8967     if ((ret == name) && (prefix == prefix2))
8968 	return((const xmlChar*) 1);
8969     return ret;
8970 }
8971 
8972 /**
8973  * xmlParseAttValueInternal:
8974  * @ctxt:  an XML parser context
8975  * @len:  attribute len result
8976  * @alloc:  whether the attribute was reallocated as a new string
8977  * @normalize:  if 1 then further non-CDATA normalization must be done
8978  *
8979  * parse a value for an attribute.
8980  * NOTE: if no normalization is needed, the routine will return pointers
8981  *       directly from the data buffer.
8982  *
8983  * 3.3.3 Attribute-Value Normalization:
8984  * Before the value of an attribute is passed to the application or
8985  * checked for validity, the XML processor must normalize it as follows:
8986  * - a character reference is processed by appending the referenced
8987  *   character to the attribute value
8988  * - an entity reference is processed by recursively processing the
8989  *   replacement text of the entity
8990  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8991  *   appending #x20 to the normalized value, except that only a single
8992  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
8993  *   parsed entity or the literal entity value of an internal parsed entity
8994  * - other characters are processed by appending them to the normalized value
8995  * If the declared value is not CDATA, then the XML processor must further
8996  * process the normalized attribute value by discarding any leading and
8997  * trailing space (#x20) characters, and by replacing sequences of space
8998  * (#x20) characters by a single space (#x20) character.
8999  * All attributes for which no declaration has been read should be treated
9000  * by a non-validating parser as if declared CDATA.
9001  *
9002  * Returns the AttValue parsed or NULL. The value has to be freed by the
9003  *     caller if it was copied, this can be detected by val[*len] == 0.
9004  */
9005 
9006 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * len,int * alloc,int normalize)9007 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9008                          int normalize)
9009 {
9010     xmlChar limit = 0;
9011     const xmlChar *in = NULL, *start, *end, *last;
9012     xmlChar *ret = NULL;
9013     int line, col;
9014 
9015     GROW;
9016     in = (xmlChar *) CUR_PTR;
9017     line = ctxt->input->line;
9018     col = ctxt->input->col;
9019     if (*in != '"' && *in != '\'') {
9020         xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9021         return (NULL);
9022     }
9023     ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9024 
9025     /*
9026      * try to handle in this routine the most common case where no
9027      * allocation of a new string is required and where content is
9028      * pure ASCII.
9029      */
9030     limit = *in++;
9031     col++;
9032     end = ctxt->input->end;
9033     start = in;
9034     if (in >= end) {
9035         const xmlChar *oldbase = ctxt->input->base;
9036 	GROW;
9037 	if (oldbase != ctxt->input->base) {
9038 	    long delta = ctxt->input->base - oldbase;
9039 	    start = start + delta;
9040 	    in = in + delta;
9041 	}
9042 	end = ctxt->input->end;
9043     }
9044     if (normalize) {
9045         /*
9046 	 * Skip any leading spaces
9047 	 */
9048 	while ((in < end) && (*in != limit) &&
9049 	       ((*in == 0x20) || (*in == 0x9) ||
9050 	        (*in == 0xA) || (*in == 0xD))) {
9051 	    if (*in == 0xA) {
9052 	        line++; col = 1;
9053 	    } else {
9054 	        col++;
9055 	    }
9056 	    in++;
9057 	    start = in;
9058 	    if (in >= end) {
9059 		const xmlChar *oldbase = ctxt->input->base;
9060 		GROW;
9061                 if (ctxt->instate == XML_PARSER_EOF)
9062                     return(NULL);
9063 		if (oldbase != ctxt->input->base) {
9064 		    long delta = ctxt->input->base - oldbase;
9065 		    start = start + delta;
9066 		    in = in + delta;
9067 		}
9068 		end = ctxt->input->end;
9069                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9070                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9071                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9072                                    "AttValue length too long\n");
9073                     return(NULL);
9074                 }
9075 	    }
9076 	}
9077 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9078 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9079 	    col++;
9080 	    if ((*in++ == 0x20) && (*in == 0x20)) break;
9081 	    if (in >= end) {
9082 		const xmlChar *oldbase = ctxt->input->base;
9083 		GROW;
9084                 if (ctxt->instate == XML_PARSER_EOF)
9085                     return(NULL);
9086 		if (oldbase != ctxt->input->base) {
9087 		    long delta = ctxt->input->base - oldbase;
9088 		    start = start + delta;
9089 		    in = in + delta;
9090 		}
9091 		end = ctxt->input->end;
9092                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9093                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9094                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9095                                    "AttValue length too long\n");
9096                     return(NULL);
9097                 }
9098 	    }
9099 	}
9100 	last = in;
9101 	/*
9102 	 * skip the trailing blanks
9103 	 */
9104 	while ((last[-1] == 0x20) && (last > start)) last--;
9105 	while ((in < end) && (*in != limit) &&
9106 	       ((*in == 0x20) || (*in == 0x9) ||
9107 	        (*in == 0xA) || (*in == 0xD))) {
9108 	    if (*in == 0xA) {
9109 	        line++, col = 1;
9110 	    } else {
9111 	        col++;
9112 	    }
9113 	    in++;
9114 	    if (in >= end) {
9115 		const xmlChar *oldbase = ctxt->input->base;
9116 		GROW;
9117                 if (ctxt->instate == XML_PARSER_EOF)
9118                     return(NULL);
9119 		if (oldbase != ctxt->input->base) {
9120 		    long delta = ctxt->input->base - oldbase;
9121 		    start = start + delta;
9122 		    in = in + delta;
9123 		    last = last + delta;
9124 		}
9125 		end = ctxt->input->end;
9126                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9127                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9128                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9129                                    "AttValue length too long\n");
9130                     return(NULL);
9131                 }
9132 	    }
9133 	}
9134         if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9135             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9136             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9137                            "AttValue length too long\n");
9138             return(NULL);
9139         }
9140 	if (*in != limit) goto need_complex;
9141     } else {
9142 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9143 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9144 	    in++;
9145 	    col++;
9146 	    if (in >= end) {
9147 		const xmlChar *oldbase = ctxt->input->base;
9148 		GROW;
9149                 if (ctxt->instate == XML_PARSER_EOF)
9150                     return(NULL);
9151 		if (oldbase != ctxt->input->base) {
9152 		    long delta = ctxt->input->base - oldbase;
9153 		    start = start + delta;
9154 		    in = in + delta;
9155 		}
9156 		end = ctxt->input->end;
9157                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9158                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9159                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9160                                    "AttValue length too long\n");
9161                     return(NULL);
9162                 }
9163 	    }
9164 	}
9165 	last = in;
9166         if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9167             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9168             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9169                            "AttValue length too long\n");
9170             return(NULL);
9171         }
9172 	if (*in != limit) goto need_complex;
9173     }
9174     in++;
9175     col++;
9176     if (len != NULL) {
9177         *len = last - start;
9178         ret = (xmlChar *) start;
9179     } else {
9180         if (alloc) *alloc = 1;
9181         ret = xmlStrndup(start, last - start);
9182     }
9183     CUR_PTR = in;
9184     ctxt->input->line = line;
9185     ctxt->input->col = col;
9186     if (alloc) *alloc = 0;
9187     return ret;
9188 need_complex:
9189     if (alloc) *alloc = 1;
9190     return xmlParseAttValueComplex(ctxt, len, normalize);
9191 }
9192 
9193 /**
9194  * xmlParseAttribute2:
9195  * @ctxt:  an XML parser context
9196  * @pref:  the element prefix
9197  * @elem:  the element name
9198  * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9199  * @value:  a xmlChar ** used to store the value of the attribute
9200  * @len:  an int * to save the length of the attribute
9201  * @alloc:  an int * to indicate if the attribute was allocated
9202  *
9203  * parse an attribute in the new SAX2 framework.
9204  *
9205  * Returns the attribute name, and the value in *value, .
9206  */
9207 
9208 static const xmlChar *
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,const xmlChar ** prefix,xmlChar ** value,int * len,int * alloc)9209 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9210                    const xmlChar * pref, const xmlChar * elem,
9211                    const xmlChar ** prefix, xmlChar ** value,
9212                    int *len, int *alloc)
9213 {
9214     const xmlChar *name;
9215     xmlChar *val, *internal_val = NULL;
9216     int normalize = 0;
9217 
9218     *value = NULL;
9219     GROW;
9220     name = xmlParseQName(ctxt, prefix);
9221     if (name == NULL) {
9222         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9223                        "error parsing attribute name\n");
9224         return (NULL);
9225     }
9226 
9227     /*
9228      * get the type if needed
9229      */
9230     if (ctxt->attsSpecial != NULL) {
9231         int type;
9232 
9233         type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
9234                                             pref, elem, *prefix, name);
9235         if (type != 0)
9236             normalize = 1;
9237     }
9238 
9239     /*
9240      * read the value
9241      */
9242     SKIP_BLANKS;
9243     if (RAW == '=') {
9244         NEXT;
9245         SKIP_BLANKS;
9246         val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9247 	if (normalize) {
9248 	    /*
9249 	     * Sometimes a second normalisation pass for spaces is needed
9250 	     * but that only happens if charrefs or entities refernces
9251 	     * have been used in the attribute value, i.e. the attribute
9252 	     * value have been extracted in an allocated string already.
9253 	     */
9254 	    if (*alloc) {
9255 	        const xmlChar *val2;
9256 
9257 	        val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9258 		if ((val2 != NULL) && (val2 != val)) {
9259 		    xmlFree(val);
9260 		    val = (xmlChar *) val2;
9261 		}
9262 	    }
9263 	}
9264         ctxt->instate = XML_PARSER_CONTENT;
9265     } else {
9266         xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9267                           "Specification mandate value for attribute %s\n",
9268                           name);
9269         return (NULL);
9270     }
9271 
9272     if (*prefix == ctxt->str_xml) {
9273         /*
9274          * Check that xml:lang conforms to the specification
9275          * No more registered as an error, just generate a warning now
9276          * since this was deprecated in XML second edition
9277          */
9278         if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9279             internal_val = xmlStrndup(val, *len);
9280             if (!xmlCheckLanguageID(internal_val)) {
9281                 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9282                               "Malformed value for xml:lang : %s\n",
9283                               internal_val, NULL);
9284             }
9285         }
9286 
9287         /*
9288          * Check that xml:space conforms to the specification
9289          */
9290         if (xmlStrEqual(name, BAD_CAST "space")) {
9291             internal_val = xmlStrndup(val, *len);
9292             if (xmlStrEqual(internal_val, BAD_CAST "default"))
9293                 *(ctxt->space) = 0;
9294             else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9295                 *(ctxt->space) = 1;
9296             else {
9297                 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9298                               "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9299                               internal_val, NULL);
9300             }
9301         }
9302         if (internal_val) {
9303             xmlFree(internal_val);
9304         }
9305     }
9306 
9307     *value = val;
9308     return (name);
9309 }
9310 /**
9311  * xmlParseStartTag2:
9312  * @ctxt:  an XML parser context
9313  *
9314  * parse a start of tag either for rule element or
9315  * EmptyElement. In both case we don't parse the tag closing chars.
9316  * This routine is called when running SAX2 parsing
9317  *
9318  * [40] STag ::= '<' Name (S Attribute)* S? '>'
9319  *
9320  * [ WFC: Unique Att Spec ]
9321  * No attribute name may appear more than once in the same start-tag or
9322  * empty-element tag.
9323  *
9324  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9325  *
9326  * [ WFC: Unique Att Spec ]
9327  * No attribute name may appear more than once in the same start-tag or
9328  * empty-element tag.
9329  *
9330  * With namespace:
9331  *
9332  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9333  *
9334  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9335  *
9336  * Returns the element name parsed
9337  */
9338 
9339 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * tlen)9340 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9341                   const xmlChar **URI, int *tlen) {
9342     const xmlChar *localname;
9343     const xmlChar *prefix;
9344     const xmlChar *attname;
9345     const xmlChar *aprefix;
9346     const xmlChar *nsname;
9347     xmlChar *attvalue;
9348     const xmlChar **atts = ctxt->atts;
9349     int maxatts = ctxt->maxatts;
9350     int nratts, nbatts, nbdef;
9351     int i, j, nbNs, attval, oldline, oldcol, inputNr;
9352     const xmlChar *base;
9353     unsigned long cur;
9354     int nsNr = ctxt->nsNr;
9355 
9356     if (RAW != '<') return(NULL);
9357     NEXT1;
9358 
9359     /*
9360      * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9361      *       point since the attribute values may be stored as pointers to
9362      *       the buffer and calling SHRINK would destroy them !
9363      *       The Shrinking is only possible once the full set of attribute
9364      *       callbacks have been done.
9365      */
9366 reparse:
9367     SHRINK;
9368     base = ctxt->input->base;
9369     cur = ctxt->input->cur - ctxt->input->base;
9370     inputNr = ctxt->inputNr;
9371     oldline = ctxt->input->line;
9372     oldcol = ctxt->input->col;
9373     nbatts = 0;
9374     nratts = 0;
9375     nbdef = 0;
9376     nbNs = 0;
9377     attval = 0;
9378     /* Forget any namespaces added during an earlier parse of this element. */
9379     ctxt->nsNr = nsNr;
9380 
9381     localname = xmlParseQName(ctxt, &prefix);
9382     if (localname == NULL) {
9383 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9384 		       "StartTag: invalid element name\n");
9385         return(NULL);
9386     }
9387     *tlen = ctxt->input->cur - ctxt->input->base - cur;
9388 
9389     /*
9390      * Now parse the attributes, it ends up with the ending
9391      *
9392      * (S Attribute)* S?
9393      */
9394     SKIP_BLANKS;
9395     GROW;
9396     if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9397         goto base_changed;
9398 
9399     while (((RAW != '>') &&
9400 	   ((RAW != '/') || (NXT(1) != '>')) &&
9401 	   (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9402 	const xmlChar *q = CUR_PTR;
9403 	unsigned int cons = ctxt->input->consumed;
9404 	int len = -1, alloc = 0;
9405 
9406 	attname = xmlParseAttribute2(ctxt, prefix, localname,
9407 	                             &aprefix, &attvalue, &len, &alloc);
9408 	if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) {
9409 	    if ((attvalue != NULL) && (alloc != 0))
9410 	        xmlFree(attvalue);
9411 	    attvalue = NULL;
9412 	    goto base_changed;
9413 	}
9414         if ((attname != NULL) && (attvalue != NULL)) {
9415 	    if (len < 0) len = xmlStrlen(attvalue);
9416             if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9417 	        const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9418 		xmlURIPtr uri;
9419 
9420                 if (URL == NULL) {
9421 		    xmlErrMemory(ctxt, "dictionary allocation failure");
9422 		    if ((attvalue != NULL) && (alloc != 0))
9423 			xmlFree(attvalue);
9424 		    return(NULL);
9425 		}
9426                 if (*URL != 0) {
9427 		    uri = xmlParseURI((const char *) URL);
9428 		    if (uri == NULL) {
9429 			xmlNsErr(ctxt, XML_WAR_NS_URI,
9430 			         "xmlns: '%s' is not a valid URI\n",
9431 					   URL, NULL, NULL);
9432 		    } else {
9433 			if (uri->scheme == NULL) {
9434 			    xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9435 				      "xmlns: URI %s is not absolute\n",
9436 				      URL, NULL, NULL);
9437 			}
9438 			xmlFreeURI(uri);
9439 		    }
9440 		    if (URL == ctxt->str_xml_ns) {
9441 			if (attname != ctxt->str_xml) {
9442 			    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9443 			 "xml namespace URI cannot be the default namespace\n",
9444 				     NULL, NULL, NULL);
9445 			}
9446 			goto skip_default_ns;
9447 		    }
9448 		    if ((len == 29) &&
9449 			(xmlStrEqual(URL,
9450 				 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9451 			xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9452 			     "reuse of the xmlns namespace name is forbidden\n",
9453 				 NULL, NULL, NULL);
9454 			goto skip_default_ns;
9455 		    }
9456 		}
9457 		/*
9458 		 * check that it's not a defined namespace
9459 		 */
9460 		for (j = 1;j <= nbNs;j++)
9461 		    if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9462 			break;
9463 		if (j <= nbNs)
9464 		    xmlErrAttributeDup(ctxt, NULL, attname);
9465 		else
9466 		    if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9467 skip_default_ns:
9468 		if (alloc != 0) xmlFree(attvalue);
9469 		if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9470 		    break;
9471 		if (!IS_BLANK_CH(RAW)) {
9472 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9473 				   "attributes construct error\n");
9474 		    break;
9475 		}
9476 		SKIP_BLANKS;
9477 		continue;
9478 	    }
9479             if (aprefix == ctxt->str_xmlns) {
9480 	        const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9481 		xmlURIPtr uri;
9482 
9483                 if (attname == ctxt->str_xml) {
9484 		    if (URL != ctxt->str_xml_ns) {
9485 		        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9486 			         "xml namespace prefix mapped to wrong URI\n",
9487 			         NULL, NULL, NULL);
9488 		    }
9489 		    /*
9490 		     * Do not keep a namespace definition node
9491 		     */
9492 		    goto skip_ns;
9493 		}
9494                 if (URL == ctxt->str_xml_ns) {
9495 		    if (attname != ctxt->str_xml) {
9496 		        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9497 			         "xml namespace URI mapped to wrong prefix\n",
9498 			         NULL, NULL, NULL);
9499 		    }
9500 		    goto skip_ns;
9501 		}
9502                 if (attname == ctxt->str_xmlns) {
9503 		    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9504 			     "redefinition of the xmlns prefix is forbidden\n",
9505 			     NULL, NULL, NULL);
9506 		    goto skip_ns;
9507 		}
9508 		if ((len == 29) &&
9509 		    (xmlStrEqual(URL,
9510 		                 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9511 		    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9512 			     "reuse of the xmlns namespace name is forbidden\n",
9513 			     NULL, NULL, NULL);
9514 		    goto skip_ns;
9515 		}
9516 		if ((URL == NULL) || (URL[0] == 0)) {
9517 		    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9518 		             "xmlns:%s: Empty XML namespace is not allowed\n",
9519 			          attname, NULL, NULL);
9520 		    goto skip_ns;
9521 		} else {
9522 		    uri = xmlParseURI((const char *) URL);
9523 		    if (uri == NULL) {
9524 			xmlNsErr(ctxt, XML_WAR_NS_URI,
9525 			     "xmlns:%s: '%s' is not a valid URI\n",
9526 					   attname, URL, NULL);
9527 		    } else {
9528 			if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9529 			    xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9530 				      "xmlns:%s: URI %s is not absolute\n",
9531 				      attname, URL, NULL);
9532 			}
9533 			xmlFreeURI(uri);
9534 		    }
9535 		}
9536 
9537 		/*
9538 		 * check that it's not a defined namespace
9539 		 */
9540 		for (j = 1;j <= nbNs;j++)
9541 		    if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9542 			break;
9543 		if (j <= nbNs)
9544 		    xmlErrAttributeDup(ctxt, aprefix, attname);
9545 		else
9546 		    if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9547 skip_ns:
9548 		if (alloc != 0) xmlFree(attvalue);
9549 		if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9550 		    break;
9551 		if (!IS_BLANK_CH(RAW)) {
9552 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9553 				   "attributes construct error\n");
9554 		    break;
9555 		}
9556 		SKIP_BLANKS;
9557 		if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9558 		    goto base_changed;
9559 		continue;
9560 	    }
9561 
9562 	    /*
9563 	     * Add the pair to atts
9564 	     */
9565 	    if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9566 	        if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9567 		    if (attvalue[len] == 0)
9568 			xmlFree(attvalue);
9569 		    goto failed;
9570 		}
9571 	        maxatts = ctxt->maxatts;
9572 		atts = ctxt->atts;
9573 	    }
9574 	    ctxt->attallocs[nratts++] = alloc;
9575 	    atts[nbatts++] = attname;
9576 	    atts[nbatts++] = aprefix;
9577 	    atts[nbatts++] = NULL; /* the URI will be fetched later */
9578 	    atts[nbatts++] = attvalue;
9579 	    attvalue += len;
9580 	    atts[nbatts++] = attvalue;
9581 	    /*
9582 	     * tag if some deallocation is needed
9583 	     */
9584 	    if (alloc != 0) attval = 1;
9585 	} else {
9586 	    if ((attvalue != NULL) && (attvalue[len] == 0))
9587 		xmlFree(attvalue);
9588 	}
9589 
9590 failed:
9591 
9592 	GROW
9593         if (ctxt->instate == XML_PARSER_EOF)
9594             break;
9595 	if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9596 	    goto base_changed;
9597 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9598 	    break;
9599 	if (!IS_BLANK_CH(RAW)) {
9600 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9601 			   "attributes construct error\n");
9602 	    break;
9603 	}
9604 	SKIP_BLANKS;
9605         if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9606             (attname == NULL) && (attvalue == NULL)) {
9607 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9608 	         "xmlParseStartTag: problem parsing attributes\n");
9609 	    break;
9610 	}
9611         GROW;
9612 	if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9613 	    goto base_changed;
9614     }
9615 
9616     /*
9617      * The attributes defaulting
9618      */
9619     if (ctxt->attsDefault != NULL) {
9620         xmlDefAttrsPtr defaults;
9621 
9622 	defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9623 	if (defaults != NULL) {
9624 	    for (i = 0;i < defaults->nbAttrs;i++) {
9625 	        attname = defaults->values[5 * i];
9626 		aprefix = defaults->values[5 * i + 1];
9627 
9628                 /*
9629 		 * special work for namespaces defaulted defs
9630 		 */
9631 		if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9632 		    /*
9633 		     * check that it's not a defined namespace
9634 		     */
9635 		    for (j = 1;j <= nbNs;j++)
9636 		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9637 			    break;
9638 	            if (j <= nbNs) continue;
9639 
9640 		    nsname = xmlGetNamespace(ctxt, NULL);
9641 		    if (nsname != defaults->values[5 * i + 2]) {
9642 			if (nsPush(ctxt, NULL,
9643 			           defaults->values[5 * i + 2]) > 0)
9644 			    nbNs++;
9645 		    }
9646 		} else if (aprefix == ctxt->str_xmlns) {
9647 		    /*
9648 		     * check that it's not a defined namespace
9649 		     */
9650 		    for (j = 1;j <= nbNs;j++)
9651 		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9652 			    break;
9653 	            if (j <= nbNs) continue;
9654 
9655 		    nsname = xmlGetNamespace(ctxt, attname);
9656 		    if (nsname != defaults->values[2]) {
9657 			if (nsPush(ctxt, attname,
9658 			           defaults->values[5 * i + 2]) > 0)
9659 			    nbNs++;
9660 		    }
9661 		} else {
9662 		    /*
9663 		     * check that it's not a defined attribute
9664 		     */
9665 		    for (j = 0;j < nbatts;j+=5) {
9666 			if ((attname == atts[j]) && (aprefix == atts[j+1]))
9667 			    break;
9668 		    }
9669 		    if (j < nbatts) continue;
9670 
9671 		    if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9672 			if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9673 			    return(NULL);
9674 			}
9675 			maxatts = ctxt->maxatts;
9676 			atts = ctxt->atts;
9677 		    }
9678 		    atts[nbatts++] = attname;
9679 		    atts[nbatts++] = aprefix;
9680 		    if (aprefix == NULL)
9681 			atts[nbatts++] = NULL;
9682 		    else
9683 		        atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9684 		    atts[nbatts++] = defaults->values[5 * i + 2];
9685 		    atts[nbatts++] = defaults->values[5 * i + 3];
9686 		    if ((ctxt->standalone == 1) &&
9687 		        (defaults->values[5 * i + 4] != NULL)) {
9688 			xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9689 	  "standalone: attribute %s on %s defaulted from external subset\n",
9690 	                                 attname, localname);
9691 		    }
9692 		    nbdef++;
9693 		}
9694 	    }
9695 	}
9696     }
9697 
9698     /*
9699      * The attributes checkings
9700      */
9701     for (i = 0; i < nbatts;i += 5) {
9702         /*
9703 	* The default namespace does not apply to attribute names.
9704 	*/
9705 	if (atts[i + 1] != NULL) {
9706 	    nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9707 	    if (nsname == NULL) {
9708 		xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9709 		    "Namespace prefix %s for %s on %s is not defined\n",
9710 		    atts[i + 1], atts[i], localname);
9711 	    }
9712 	    atts[i + 2] = nsname;
9713 	} else
9714 	    nsname = NULL;
9715 	/*
9716 	 * [ WFC: Unique Att Spec ]
9717 	 * No attribute name may appear more than once in the same
9718 	 * start-tag or empty-element tag.
9719 	 * As extended by the Namespace in XML REC.
9720 	 */
9721         for (j = 0; j < i;j += 5) {
9722 	    if (atts[i] == atts[j]) {
9723 	        if (atts[i+1] == atts[j+1]) {
9724 		    xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9725 		    break;
9726 		}
9727 		if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9728 		    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9729 			     "Namespaced Attribute %s in '%s' redefined\n",
9730 			     atts[i], nsname, NULL);
9731 		    break;
9732 		}
9733 	    }
9734 	}
9735     }
9736 
9737     nsname = xmlGetNamespace(ctxt, prefix);
9738     if ((prefix != NULL) && (nsname == NULL)) {
9739 	xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9740 	         "Namespace prefix %s on %s is not defined\n",
9741 		 prefix, localname, NULL);
9742     }
9743     *pref = prefix;
9744     *URI = nsname;
9745 
9746     /*
9747      * SAX: Start of Element !
9748      */
9749     if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9750 	(!ctxt->disableSAX)) {
9751 	if (nbNs > 0)
9752 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9753 			  nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9754 			  nbatts / 5, nbdef, atts);
9755 	else
9756 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9757 	                  nsname, 0, NULL, nbatts / 5, nbdef, atts);
9758     }
9759 
9760     /*
9761      * Free up attribute allocated strings if needed
9762      */
9763     if (attval != 0) {
9764 	for (i = 3,j = 0; j < nratts;i += 5,j++)
9765 	    if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9766 	        xmlFree((xmlChar *) atts[i]);
9767     }
9768 
9769     return(localname);
9770 
9771 base_changed:
9772     /*
9773      * the attribute strings are valid iif the base didn't changed
9774      */
9775     if (attval != 0) {
9776 	for (i = 3,j = 0; j < nratts;i += 5,j++)
9777 	    if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9778 	        xmlFree((xmlChar *) atts[i]);
9779     }
9780 
9781     /*
9782      * We can't switch from one entity to another in the middle
9783      * of a start tag
9784      */
9785     if (inputNr != ctxt->inputNr) {
9786         xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
9787 		    "Start tag doesn't start and stop in the same entity\n");
9788 	return(NULL);
9789     }
9790 
9791     ctxt->input->cur = ctxt->input->base + cur;
9792     ctxt->input->line = oldline;
9793     ctxt->input->col = oldcol;
9794     if (ctxt->wellFormed == 1) {
9795 	goto reparse;
9796     }
9797     return(NULL);
9798 }
9799 
9800 /**
9801  * xmlParseEndTag2:
9802  * @ctxt:  an XML parser context
9803  * @line:  line of the start tag
9804  * @nsNr:  number of namespaces on the start tag
9805  *
9806  * parse an end of tag
9807  *
9808  * [42] ETag ::= '</' Name S? '>'
9809  *
9810  * With namespace
9811  *
9812  * [NS 9] ETag ::= '</' QName S? '>'
9813  */
9814 
9815 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr,int tlen)9816 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9817                 const xmlChar *URI, int line, int nsNr, int tlen) {
9818     const xmlChar *name;
9819 
9820     GROW;
9821     if ((RAW != '<') || (NXT(1) != '/')) {
9822 	xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9823 	return;
9824     }
9825     SKIP(2);
9826 
9827     if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9828         if (ctxt->input->cur[tlen] == '>') {
9829 	    ctxt->input->cur += tlen + 1;
9830 	    ctxt->input->col += tlen + 1;
9831 	    goto done;
9832 	}
9833 	ctxt->input->cur += tlen;
9834 	ctxt->input->col += tlen;
9835 	name = (xmlChar*)1;
9836     } else {
9837 	if (prefix == NULL)
9838 	    name = xmlParseNameAndCompare(ctxt, ctxt->name);
9839 	else
9840 	    name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9841     }
9842 
9843     /*
9844      * We should definitely be at the ending "S? '>'" part
9845      */
9846     GROW;
9847     if (ctxt->instate == XML_PARSER_EOF)
9848         return;
9849     SKIP_BLANKS;
9850     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9851 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9852     } else
9853 	NEXT1;
9854 
9855     /*
9856      * [ WFC: Element Type Match ]
9857      * The Name in an element's end-tag must match the element type in the
9858      * start-tag.
9859      *
9860      */
9861     if (name != (xmlChar*)1) {
9862         if (name == NULL) name = BAD_CAST "unparseable";
9863         if ((line == 0) && (ctxt->node != NULL))
9864             line = ctxt->node->line;
9865         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9866 		     "Opening and ending tag mismatch: %s line %d and %s\n",
9867 		                ctxt->name, line, name);
9868     }
9869 
9870     /*
9871      * SAX: End of Tag
9872      */
9873 done:
9874     if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9875 	(!ctxt->disableSAX))
9876 	ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9877 
9878     spacePop(ctxt);
9879     if (nsNr != 0)
9880 	nsPop(ctxt, nsNr);
9881     return;
9882 }
9883 
9884 /**
9885  * xmlParseCDSect:
9886  * @ctxt:  an XML parser context
9887  *
9888  * Parse escaped pure raw content.
9889  *
9890  * [18] CDSect ::= CDStart CData CDEnd
9891  *
9892  * [19] CDStart ::= '<![CDATA['
9893  *
9894  * [20] Data ::= (Char* - (Char* ']]>' Char*))
9895  *
9896  * [21] CDEnd ::= ']]>'
9897  */
9898 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9899 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9900     xmlChar *buf = NULL;
9901     int len = 0;
9902     int size = XML_PARSER_BUFFER_SIZE;
9903     int r, rl;
9904     int	s, sl;
9905     int cur, l;
9906     int count = 0;
9907 
9908     /* Check 2.6.0 was NXT(0) not RAW */
9909     if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9910 	SKIP(9);
9911     } else
9912         return;
9913 
9914     ctxt->instate = XML_PARSER_CDATA_SECTION;
9915     r = CUR_CHAR(rl);
9916     if (!IS_CHAR(r)) {
9917 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9918 	ctxt->instate = XML_PARSER_CONTENT;
9919         return;
9920     }
9921     NEXTL(rl);
9922     s = CUR_CHAR(sl);
9923     if (!IS_CHAR(s)) {
9924 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9925 	ctxt->instate = XML_PARSER_CONTENT;
9926         return;
9927     }
9928     NEXTL(sl);
9929     cur = CUR_CHAR(l);
9930     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9931     if (buf == NULL) {
9932 	xmlErrMemory(ctxt, NULL);
9933 	return;
9934     }
9935     while (IS_CHAR(cur) &&
9936            ((r != ']') || (s != ']') || (cur != '>'))) {
9937 	if (len + 5 >= size) {
9938 	    xmlChar *tmp;
9939 
9940             if ((size > XML_MAX_TEXT_LENGTH) &&
9941                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9942                 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9943                              "CData section too big found", NULL);
9944                 xmlFree (buf);
9945                 return;
9946             }
9947 	    tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9948 	    if (tmp == NULL) {
9949 	        xmlFree(buf);
9950 		xmlErrMemory(ctxt, NULL);
9951 		return;
9952 	    }
9953 	    buf = tmp;
9954 	    size *= 2;
9955 	}
9956 	COPY_BUF(rl,buf,len,r);
9957 	r = s;
9958 	rl = sl;
9959 	s = cur;
9960 	sl = l;
9961 	count++;
9962 	if (count > 50) {
9963 	    GROW;
9964             if (ctxt->instate == XML_PARSER_EOF) {
9965 		xmlFree(buf);
9966 		return;
9967             }
9968 	    count = 0;
9969 	}
9970 	NEXTL(l);
9971 	cur = CUR_CHAR(l);
9972     }
9973     buf[len] = 0;
9974     ctxt->instate = XML_PARSER_CONTENT;
9975     if (cur != '>') {
9976 	xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9977 	                     "CData section not finished\n%.50s\n", buf);
9978 	xmlFree(buf);
9979         return;
9980     }
9981     NEXTL(l);
9982 
9983     /*
9984      * OK the buffer is to be consumed as cdata.
9985      */
9986     if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9987 	if (ctxt->sax->cdataBlock != NULL)
9988 	    ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9989 	else if (ctxt->sax->characters != NULL)
9990 	    ctxt->sax->characters(ctxt->userData, buf, len);
9991     }
9992     xmlFree(buf);
9993 }
9994 
9995 /**
9996  * xmlParseContent:
9997  * @ctxt:  an XML parser context
9998  *
9999  * Parse a content:
10000  *
10001  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10002  */
10003 
10004 void
xmlParseContent(xmlParserCtxtPtr ctxt)10005 xmlParseContent(xmlParserCtxtPtr ctxt) {
10006     GROW;
10007     while ((RAW != 0) &&
10008 	   ((RAW != '<') || (NXT(1) != '/')) &&
10009 	   (ctxt->instate != XML_PARSER_EOF)) {
10010 	const xmlChar *test = CUR_PTR;
10011 	unsigned int cons = ctxt->input->consumed;
10012 	const xmlChar *cur = ctxt->input->cur;
10013 
10014 	/*
10015 	 * First case : a Processing Instruction.
10016 	 */
10017 	if ((*cur == '<') && (cur[1] == '?')) {
10018 	    xmlParsePI(ctxt);
10019 	}
10020 
10021 	/*
10022 	 * Second case : a CDSection
10023 	 */
10024 	/* 2.6.0 test was *cur not RAW */
10025 	else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
10026 	    xmlParseCDSect(ctxt);
10027 	}
10028 
10029 	/*
10030 	 * Third case :  a comment
10031 	 */
10032 	else if ((*cur == '<') && (NXT(1) == '!') &&
10033 		 (NXT(2) == '-') && (NXT(3) == '-')) {
10034 	    xmlParseComment(ctxt);
10035 	    ctxt->instate = XML_PARSER_CONTENT;
10036 	}
10037 
10038 	/*
10039 	 * Fourth case :  a sub-element.
10040 	 */
10041 	else if (*cur == '<') {
10042 	    xmlParseElement(ctxt);
10043 	}
10044 
10045 	/*
10046 	 * Fifth case : a reference. If if has not been resolved,
10047 	 *    parsing returns it's Name, create the node
10048 	 */
10049 
10050 	else if (*cur == '&') {
10051 	    xmlParseReference(ctxt);
10052 	}
10053 
10054 	/*
10055 	 * Last case, text. Note that References are handled directly.
10056 	 */
10057 	else {
10058 	    xmlParseCharData(ctxt, 0);
10059 	}
10060 
10061 	GROW;
10062 	/*
10063 	 * Pop-up of finished entities.
10064 	 */
10065 	while ((RAW == 0) && (ctxt->inputNr > 1))
10066 	    xmlPopInput(ctxt);
10067 	SHRINK;
10068 
10069 	if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
10070 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10071 	                "detected an error in element content\n");
10072 	    xmlHaltParser(ctxt);
10073             break;
10074 	}
10075     }
10076 }
10077 
10078 /**
10079  * xmlParseElement:
10080  * @ctxt:  an XML parser context
10081  *
10082  * parse an XML element, this is highly recursive
10083  *
10084  * [39] element ::= EmptyElemTag | STag content ETag
10085  *
10086  * [ WFC: Element Type Match ]
10087  * The Name in an element's end-tag must match the element type in the
10088  * start-tag.
10089  *
10090  */
10091 
10092 void
xmlParseElement(xmlParserCtxtPtr ctxt)10093 xmlParseElement(xmlParserCtxtPtr ctxt) {
10094     const xmlChar *name;
10095     const xmlChar *prefix = NULL;
10096     const xmlChar *URI = NULL;
10097     xmlParserNodeInfo node_info;
10098     int line, tlen = 0;
10099     xmlNodePtr ret;
10100     int nsNr = ctxt->nsNr;
10101 
10102     if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10103         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10104 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10105 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10106 			  xmlParserMaxDepth);
10107 	xmlHaltParser(ctxt);
10108 	return;
10109     }
10110 
10111     /* Capture start position */
10112     if (ctxt->record_info) {
10113         node_info.begin_pos = ctxt->input->consumed +
10114                           (CUR_PTR - ctxt->input->base);
10115 	node_info.begin_line = ctxt->input->line;
10116     }
10117 
10118     if (ctxt->spaceNr == 0)
10119 	spacePush(ctxt, -1);
10120     else if (*ctxt->space == -2)
10121 	spacePush(ctxt, -1);
10122     else
10123 	spacePush(ctxt, *ctxt->space);
10124 
10125     line = ctxt->input->line;
10126 #ifdef LIBXML_SAX1_ENABLED
10127     if (ctxt->sax2)
10128 #endif /* LIBXML_SAX1_ENABLED */
10129         name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10130 #ifdef LIBXML_SAX1_ENABLED
10131     else
10132 	name = xmlParseStartTag(ctxt);
10133 #endif /* LIBXML_SAX1_ENABLED */
10134     if (ctxt->instate == XML_PARSER_EOF)
10135 	return;
10136     if (name == NULL) {
10137 	spacePop(ctxt);
10138         return;
10139     }
10140     namePush(ctxt, name);
10141     ret = ctxt->node;
10142 
10143 #ifdef LIBXML_VALID_ENABLED
10144     /*
10145      * [ VC: Root Element Type ]
10146      * The Name in the document type declaration must match the element
10147      * type of the root element.
10148      */
10149     if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10150         ctxt->node && (ctxt->node == ctxt->myDoc->children))
10151         ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10152 #endif /* LIBXML_VALID_ENABLED */
10153 
10154     /*
10155      * Check for an Empty Element.
10156      */
10157     if ((RAW == '/') && (NXT(1) == '>')) {
10158         SKIP(2);
10159 	if (ctxt->sax2) {
10160 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10161 		(!ctxt->disableSAX))
10162 		ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10163 #ifdef LIBXML_SAX1_ENABLED
10164 	} else {
10165 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10166 		(!ctxt->disableSAX))
10167 		ctxt->sax->endElement(ctxt->userData, name);
10168 #endif /* LIBXML_SAX1_ENABLED */
10169 	}
10170 	namePop(ctxt);
10171 	spacePop(ctxt);
10172 	if (nsNr != ctxt->nsNr)
10173 	    nsPop(ctxt, ctxt->nsNr - nsNr);
10174 	if ( ret != NULL && ctxt->record_info ) {
10175 	   node_info.end_pos = ctxt->input->consumed +
10176 			      (CUR_PTR - ctxt->input->base);
10177 	   node_info.end_line = ctxt->input->line;
10178 	   node_info.node = ret;
10179 	   xmlParserAddNodeInfo(ctxt, &node_info);
10180 	}
10181 	return;
10182     }
10183     if (RAW == '>') {
10184         NEXT1;
10185     } else {
10186         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10187 		     "Couldn't find end of Start Tag %s line %d\n",
10188 		                name, line, NULL);
10189 
10190 	/*
10191 	 * end of parsing of this node.
10192 	 */
10193 	nodePop(ctxt);
10194 	namePop(ctxt);
10195 	spacePop(ctxt);
10196 	if (nsNr != ctxt->nsNr)
10197 	    nsPop(ctxt, ctxt->nsNr - nsNr);
10198 
10199 	/*
10200 	 * Capture end position and add node
10201 	 */
10202 	if ( ret != NULL && ctxt->record_info ) {
10203 	   node_info.end_pos = ctxt->input->consumed +
10204 			      (CUR_PTR - ctxt->input->base);
10205 	   node_info.end_line = ctxt->input->line;
10206 	   node_info.node = ret;
10207 	   xmlParserAddNodeInfo(ctxt, &node_info);
10208 	}
10209 	return;
10210     }
10211 
10212     /*
10213      * Parse the content of the element:
10214      */
10215     xmlParseContent(ctxt);
10216     if (ctxt->instate == XML_PARSER_EOF)
10217 	return;
10218     if (!IS_BYTE_CHAR(RAW)) {
10219         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10220 	 "Premature end of data in tag %s line %d\n",
10221 		                name, line, NULL);
10222 
10223 	/*
10224 	 * end of parsing of this node.
10225 	 */
10226 	nodePop(ctxt);
10227 	namePop(ctxt);
10228 	spacePop(ctxt);
10229 	if (nsNr != ctxt->nsNr)
10230 	    nsPop(ctxt, ctxt->nsNr - nsNr);
10231 	return;
10232     }
10233 
10234     /*
10235      * parse the end of tag: '</' should be here.
10236      */
10237     if (ctxt->sax2) {
10238 	xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10239 	namePop(ctxt);
10240     }
10241 #ifdef LIBXML_SAX1_ENABLED
10242       else
10243 	xmlParseEndTag1(ctxt, line);
10244 #endif /* LIBXML_SAX1_ENABLED */
10245 
10246     /*
10247      * Capture end position and add node
10248      */
10249     if ( ret != NULL && ctxt->record_info ) {
10250        node_info.end_pos = ctxt->input->consumed +
10251                           (CUR_PTR - ctxt->input->base);
10252        node_info.end_line = ctxt->input->line;
10253        node_info.node = ret;
10254        xmlParserAddNodeInfo(ctxt, &node_info);
10255     }
10256 }
10257 
10258 /**
10259  * xmlParseVersionNum:
10260  * @ctxt:  an XML parser context
10261  *
10262  * parse the XML version value.
10263  *
10264  * [26] VersionNum ::= '1.' [0-9]+
10265  *
10266  * In practice allow [0-9].[0-9]+ at that level
10267  *
10268  * Returns the string giving the XML version number, or NULL
10269  */
10270 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)10271 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10272     xmlChar *buf = NULL;
10273     int len = 0;
10274     int size = 10;
10275     xmlChar cur;
10276 
10277     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10278     if (buf == NULL) {
10279 	xmlErrMemory(ctxt, NULL);
10280 	return(NULL);
10281     }
10282     cur = CUR;
10283     if (!((cur >= '0') && (cur <= '9'))) {
10284 	xmlFree(buf);
10285 	return(NULL);
10286     }
10287     buf[len++] = cur;
10288     NEXT;
10289     cur=CUR;
10290     if (cur != '.') {
10291 	xmlFree(buf);
10292 	return(NULL);
10293     }
10294     buf[len++] = cur;
10295     NEXT;
10296     cur=CUR;
10297     while ((cur >= '0') && (cur <= '9')) {
10298 	if (len + 1 >= size) {
10299 	    xmlChar *tmp;
10300 
10301 	    size *= 2;
10302 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10303 	    if (tmp == NULL) {
10304 	        xmlFree(buf);
10305 		xmlErrMemory(ctxt, NULL);
10306 		return(NULL);
10307 	    }
10308 	    buf = tmp;
10309 	}
10310 	buf[len++] = cur;
10311 	NEXT;
10312 	cur=CUR;
10313     }
10314     buf[len] = 0;
10315     return(buf);
10316 }
10317 
10318 /**
10319  * xmlParseVersionInfo:
10320  * @ctxt:  an XML parser context
10321  *
10322  * parse the XML version.
10323  *
10324  * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10325  *
10326  * [25] Eq ::= S? '=' S?
10327  *
10328  * Returns the version string, e.g. "1.0"
10329  */
10330 
10331 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)10332 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10333     xmlChar *version = NULL;
10334 
10335     if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10336 	SKIP(7);
10337 	SKIP_BLANKS;
10338 	if (RAW != '=') {
10339 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10340 	    return(NULL);
10341         }
10342 	NEXT;
10343 	SKIP_BLANKS;
10344 	if (RAW == '"') {
10345 	    NEXT;
10346 	    version = xmlParseVersionNum(ctxt);
10347 	    if (RAW != '"') {
10348 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10349 	    } else
10350 	        NEXT;
10351 	} else if (RAW == '\''){
10352 	    NEXT;
10353 	    version = xmlParseVersionNum(ctxt);
10354 	    if (RAW != '\'') {
10355 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10356 	    } else
10357 	        NEXT;
10358 	} else {
10359 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10360 	}
10361     }
10362     return(version);
10363 }
10364 
10365 /**
10366  * xmlParseEncName:
10367  * @ctxt:  an XML parser context
10368  *
10369  * parse the XML encoding name
10370  *
10371  * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10372  *
10373  * Returns the encoding name value or NULL
10374  */
10375 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)10376 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10377     xmlChar *buf = NULL;
10378     int len = 0;
10379     int size = 10;
10380     xmlChar cur;
10381 
10382     cur = CUR;
10383     if (((cur >= 'a') && (cur <= 'z')) ||
10384         ((cur >= 'A') && (cur <= 'Z'))) {
10385 	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10386 	if (buf == NULL) {
10387 	    xmlErrMemory(ctxt, NULL);
10388 	    return(NULL);
10389 	}
10390 
10391 	buf[len++] = cur;
10392 	NEXT;
10393 	cur = CUR;
10394 	while (((cur >= 'a') && (cur <= 'z')) ||
10395 	       ((cur >= 'A') && (cur <= 'Z')) ||
10396 	       ((cur >= '0') && (cur <= '9')) ||
10397 	       (cur == '.') || (cur == '_') ||
10398 	       (cur == '-')) {
10399 	    if (len + 1 >= size) {
10400 	        xmlChar *tmp;
10401 
10402 		size *= 2;
10403 		tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10404 		if (tmp == NULL) {
10405 		    xmlErrMemory(ctxt, NULL);
10406 		    xmlFree(buf);
10407 		    return(NULL);
10408 		}
10409 		buf = tmp;
10410 	    }
10411 	    buf[len++] = cur;
10412 	    NEXT;
10413 	    cur = CUR;
10414 	    if (cur == 0) {
10415 	        SHRINK;
10416 		GROW;
10417 		cur = CUR;
10418 	    }
10419         }
10420 	buf[len] = 0;
10421     } else {
10422 	xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10423     }
10424     return(buf);
10425 }
10426 
10427 /**
10428  * xmlParseEncodingDecl:
10429  * @ctxt:  an XML parser context
10430  *
10431  * parse the XML encoding declaration
10432  *
10433  * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10434  *
10435  * this setups the conversion filters.
10436  *
10437  * Returns the encoding value or NULL
10438  */
10439 
10440 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)10441 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10442     xmlChar *encoding = NULL;
10443 
10444     SKIP_BLANKS;
10445     if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10446 	SKIP(8);
10447 	SKIP_BLANKS;
10448 	if (RAW != '=') {
10449 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10450 	    return(NULL);
10451         }
10452 	NEXT;
10453 	SKIP_BLANKS;
10454 	if (RAW == '"') {
10455 	    NEXT;
10456 	    encoding = xmlParseEncName(ctxt);
10457 	    if (RAW != '"') {
10458 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10459 		xmlFree((xmlChar *) encoding);
10460 		return(NULL);
10461 	    } else
10462 	        NEXT;
10463 	} else if (RAW == '\''){
10464 	    NEXT;
10465 	    encoding = xmlParseEncName(ctxt);
10466 	    if (RAW != '\'') {
10467 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10468 		xmlFree((xmlChar *) encoding);
10469 		return(NULL);
10470 	    } else
10471 	        NEXT;
10472 	} else {
10473 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10474 	}
10475 
10476         /*
10477          * Non standard parsing, allowing the user to ignore encoding
10478          */
10479         if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10480 	    xmlFree((xmlChar *) encoding);
10481             return(NULL);
10482 	}
10483 
10484 	/*
10485 	 * UTF-16 encoding stwich has already taken place at this stage,
10486 	 * more over the little-endian/big-endian selection is already done
10487 	 */
10488         if ((encoding != NULL) &&
10489 	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10490 	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10491 	    /*
10492 	     * If no encoding was passed to the parser, that we are
10493 	     * using UTF-16 and no decoder is present i.e. the
10494 	     * document is apparently UTF-8 compatible, then raise an
10495 	     * encoding mismatch fatal error
10496 	     */
10497 	    if ((ctxt->encoding == NULL) &&
10498 	        (ctxt->input->buf != NULL) &&
10499 	        (ctxt->input->buf->encoder == NULL)) {
10500 		xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10501 		  "Document labelled UTF-16 but has UTF-8 content\n");
10502 	    }
10503 	    if (ctxt->encoding != NULL)
10504 		xmlFree((xmlChar *) ctxt->encoding);
10505 	    ctxt->encoding = encoding;
10506 	}
10507 	/*
10508 	 * UTF-8 encoding is handled natively
10509 	 */
10510         else if ((encoding != NULL) &&
10511 	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10512 	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10513 	    if (ctxt->encoding != NULL)
10514 		xmlFree((xmlChar *) ctxt->encoding);
10515 	    ctxt->encoding = encoding;
10516 	}
10517 	else if (encoding != NULL) {
10518 	    xmlCharEncodingHandlerPtr handler;
10519 
10520 	    if (ctxt->input->encoding != NULL)
10521 		xmlFree((xmlChar *) ctxt->input->encoding);
10522 	    ctxt->input->encoding = encoding;
10523 
10524             handler = xmlFindCharEncodingHandler((const char *) encoding);
10525 	    if (handler != NULL) {
10526 		if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10527 		    /* failed to convert */
10528 		    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10529 		    return(NULL);
10530 		}
10531 	    } else {
10532 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10533 			"Unsupported encoding %s\n", encoding);
10534 		return(NULL);
10535 	    }
10536 	}
10537     }
10538     return(encoding);
10539 }
10540 
10541 /**
10542  * xmlParseSDDecl:
10543  * @ctxt:  an XML parser context
10544  *
10545  * parse the XML standalone declaration
10546  *
10547  * [32] SDDecl ::= S 'standalone' Eq
10548  *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10549  *
10550  * [ VC: Standalone Document Declaration ]
10551  * TODO The standalone document declaration must have the value "no"
10552  * if any external markup declarations contain declarations of:
10553  *  - attributes with default values, if elements to which these
10554  *    attributes apply appear in the document without specifications
10555  *    of values for these attributes, or
10556  *  - entities (other than amp, lt, gt, apos, quot), if references
10557  *    to those entities appear in the document, or
10558  *  - attributes with values subject to normalization, where the
10559  *    attribute appears in the document with a value which will change
10560  *    as a result of normalization, or
10561  *  - element types with element content, if white space occurs directly
10562  *    within any instance of those types.
10563  *
10564  * Returns:
10565  *   1 if standalone="yes"
10566  *   0 if standalone="no"
10567  *  -2 if standalone attribute is missing or invalid
10568  *	  (A standalone value of -2 means that the XML declaration was found,
10569  *	   but no value was specified for the standalone attribute).
10570  */
10571 
10572 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10573 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10574     int standalone = -2;
10575 
10576     SKIP_BLANKS;
10577     if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10578 	SKIP(10);
10579         SKIP_BLANKS;
10580 	if (RAW != '=') {
10581 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10582 	    return(standalone);
10583         }
10584 	NEXT;
10585 	SKIP_BLANKS;
10586         if (RAW == '\''){
10587 	    NEXT;
10588 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10589 	        standalone = 0;
10590                 SKIP(2);
10591 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10592 	               (NXT(2) == 's')) {
10593 	        standalone = 1;
10594 		SKIP(3);
10595             } else {
10596 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10597 	    }
10598 	    if (RAW != '\'') {
10599 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10600 	    } else
10601 	        NEXT;
10602 	} else if (RAW == '"'){
10603 	    NEXT;
10604 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10605 	        standalone = 0;
10606 		SKIP(2);
10607 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10608 	               (NXT(2) == 's')) {
10609 	        standalone = 1;
10610                 SKIP(3);
10611             } else {
10612 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10613 	    }
10614 	    if (RAW != '"') {
10615 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10616 	    } else
10617 	        NEXT;
10618 	} else {
10619 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10620         }
10621     }
10622     return(standalone);
10623 }
10624 
10625 /**
10626  * xmlParseXMLDecl:
10627  * @ctxt:  an XML parser context
10628  *
10629  * parse an XML declaration header
10630  *
10631  * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10632  */
10633 
10634 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10635 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10636     xmlChar *version;
10637 
10638     /*
10639      * This value for standalone indicates that the document has an
10640      * XML declaration but it does not have a standalone attribute.
10641      * It will be overwritten later if a standalone attribute is found.
10642      */
10643     ctxt->input->standalone = -2;
10644 
10645     /*
10646      * We know that '<?xml' is here.
10647      */
10648     SKIP(5);
10649 
10650     if (!IS_BLANK_CH(RAW)) {
10651 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10652 	               "Blank needed after '<?xml'\n");
10653     }
10654     SKIP_BLANKS;
10655 
10656     /*
10657      * We must have the VersionInfo here.
10658      */
10659     version = xmlParseVersionInfo(ctxt);
10660     if (version == NULL) {
10661 	xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10662     } else {
10663 	if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10664 	    /*
10665 	     * Changed here for XML-1.0 5th edition
10666 	     */
10667 	    if (ctxt->options & XML_PARSE_OLD10) {
10668 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10669 			          "Unsupported version '%s'\n",
10670 			          version);
10671 	    } else {
10672 	        if ((version[0] == '1') && ((version[1] == '.'))) {
10673 		    xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10674 		                  "Unsupported version '%s'\n",
10675 				  version, NULL);
10676 		} else {
10677 		    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10678 				      "Unsupported version '%s'\n",
10679 				      version);
10680 		}
10681 	    }
10682 	}
10683 	if (ctxt->version != NULL)
10684 	    xmlFree((void *) ctxt->version);
10685 	ctxt->version = version;
10686     }
10687 
10688     /*
10689      * We may have the encoding declaration
10690      */
10691     if (!IS_BLANK_CH(RAW)) {
10692         if ((RAW == '?') && (NXT(1) == '>')) {
10693 	    SKIP(2);
10694 	    return;
10695 	}
10696 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10697     }
10698     xmlParseEncodingDecl(ctxt);
10699     if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10700          (ctxt->instate == XML_PARSER_EOF)) {
10701 	/*
10702 	 * The XML REC instructs us to stop parsing right here
10703 	 */
10704         return;
10705     }
10706 
10707     /*
10708      * We may have the standalone status.
10709      */
10710     if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10711         if ((RAW == '?') && (NXT(1) == '>')) {
10712 	    SKIP(2);
10713 	    return;
10714 	}
10715 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10716     }
10717 
10718     /*
10719      * We can grow the input buffer freely at that point
10720      */
10721     GROW;
10722 
10723     SKIP_BLANKS;
10724     ctxt->input->standalone = xmlParseSDDecl(ctxt);
10725 
10726     SKIP_BLANKS;
10727     if ((RAW == '?') && (NXT(1) == '>')) {
10728         SKIP(2);
10729     } else if (RAW == '>') {
10730         /* Deprecated old WD ... */
10731 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10732 	NEXT;
10733     } else {
10734 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10735 	MOVETO_ENDTAG(CUR_PTR);
10736 	NEXT;
10737     }
10738 }
10739 
10740 /**
10741  * xmlParseMisc:
10742  * @ctxt:  an XML parser context
10743  *
10744  * parse an XML Misc* optional field.
10745  *
10746  * [27] Misc ::= Comment | PI |  S
10747  */
10748 
10749 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10750 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10751     while ((ctxt->instate != XML_PARSER_EOF) &&
10752            (((RAW == '<') && (NXT(1) == '?')) ||
10753             (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10754             IS_BLANK_CH(CUR))) {
10755         if ((RAW == '<') && (NXT(1) == '?')) {
10756 	    xmlParsePI(ctxt);
10757 	} else if (IS_BLANK_CH(CUR)) {
10758 	    NEXT;
10759 	} else
10760 	    xmlParseComment(ctxt);
10761     }
10762 }
10763 
10764 /**
10765  * xmlParseDocument:
10766  * @ctxt:  an XML parser context
10767  *
10768  * parse an XML document (and build a tree if using the standard SAX
10769  * interface).
10770  *
10771  * [1] document ::= prolog element Misc*
10772  *
10773  * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10774  *
10775  * Returns 0, -1 in case of error. the parser context is augmented
10776  *                as a result of the parsing.
10777  */
10778 
10779 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10780 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10781     xmlChar start[4];
10782     xmlCharEncoding enc;
10783 
10784     xmlInitParser();
10785 
10786     if ((ctxt == NULL) || (ctxt->input == NULL))
10787         return(-1);
10788 
10789     GROW;
10790 
10791     /*
10792      * SAX: detecting the level.
10793      */
10794     xmlDetectSAX2(ctxt);
10795 
10796     /*
10797      * SAX: beginning of the document processing.
10798      */
10799     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10800         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10801     if (ctxt->instate == XML_PARSER_EOF)
10802 	return(-1);
10803 
10804     if ((ctxt->encoding == NULL) &&
10805         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10806 	/*
10807 	 * Get the 4 first bytes and decode the charset
10808 	 * if enc != XML_CHAR_ENCODING_NONE
10809 	 * plug some encoding conversion routines.
10810 	 */
10811 	start[0] = RAW;
10812 	start[1] = NXT(1);
10813 	start[2] = NXT(2);
10814 	start[3] = NXT(3);
10815 	enc = xmlDetectCharEncoding(&start[0], 4);
10816 	if (enc != XML_CHAR_ENCODING_NONE) {
10817 	    xmlSwitchEncoding(ctxt, enc);
10818 	}
10819     }
10820 
10821 
10822     if (CUR == 0) {
10823 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10824 	return(-1);
10825     }
10826 
10827     /*
10828      * Check for the XMLDecl in the Prolog.
10829      * do not GROW here to avoid the detected encoder to decode more
10830      * than just the first line, unless the amount of data is really
10831      * too small to hold "<?xml version="1.0" encoding="foo"
10832      */
10833     if ((ctxt->input->end - ctxt->input->cur) < 35) {
10834        GROW;
10835     }
10836     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10837 
10838 	/*
10839 	 * Note that we will switch encoding on the fly.
10840 	 */
10841 	xmlParseXMLDecl(ctxt);
10842 	if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10843 	    (ctxt->instate == XML_PARSER_EOF)) {
10844 	    /*
10845 	     * The XML REC instructs us to stop parsing right here
10846 	     */
10847 	    return(-1);
10848 	}
10849 	ctxt->standalone = ctxt->input->standalone;
10850 	SKIP_BLANKS;
10851     } else {
10852 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10853     }
10854     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10855         ctxt->sax->startDocument(ctxt->userData);
10856     if (ctxt->instate == XML_PARSER_EOF)
10857 	return(-1);
10858     if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10859         (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10860 	ctxt->myDoc->compression = ctxt->input->buf->compressed;
10861     }
10862 
10863     /*
10864      * The Misc part of the Prolog
10865      */
10866     GROW;
10867     xmlParseMisc(ctxt);
10868 
10869     /*
10870      * Then possibly doc type declaration(s) and more Misc
10871      * (doctypedecl Misc*)?
10872      */
10873     GROW;
10874     if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10875 
10876 	ctxt->inSubset = 1;
10877 	xmlParseDocTypeDecl(ctxt);
10878 	if (RAW == '[') {
10879 	    ctxt->instate = XML_PARSER_DTD;
10880 	    xmlParseInternalSubset(ctxt);
10881 	    if (ctxt->instate == XML_PARSER_EOF)
10882 		return(-1);
10883 	}
10884 
10885 	/*
10886 	 * Create and update the external subset.
10887 	 */
10888 	ctxt->inSubset = 2;
10889 	if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10890 	    (!ctxt->disableSAX))
10891 	    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10892 	                              ctxt->extSubSystem, ctxt->extSubURI);
10893 	if (ctxt->instate == XML_PARSER_EOF)
10894 	    return(-1);
10895 	ctxt->inSubset = 0;
10896 
10897         xmlCleanSpecialAttr(ctxt);
10898 
10899 	ctxt->instate = XML_PARSER_PROLOG;
10900 	xmlParseMisc(ctxt);
10901     }
10902 
10903     /*
10904      * Time to start parsing the tree itself
10905      */
10906     GROW;
10907     if (RAW != '<') {
10908 	xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10909 		       "Start tag expected, '<' not found\n");
10910     } else {
10911 	ctxt->instate = XML_PARSER_CONTENT;
10912 	xmlParseElement(ctxt);
10913 	ctxt->instate = XML_PARSER_EPILOG;
10914 
10915 
10916 	/*
10917 	 * The Misc part at the end
10918 	 */
10919 	xmlParseMisc(ctxt);
10920 
10921 	if (RAW != 0) {
10922 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10923 	}
10924 	ctxt->instate = XML_PARSER_EOF;
10925     }
10926 
10927     /*
10928      * SAX: end of the document processing.
10929      */
10930     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10931         ctxt->sax->endDocument(ctxt->userData);
10932 
10933     /*
10934      * Remove locally kept entity definitions if the tree was not built
10935      */
10936     if ((ctxt->myDoc != NULL) &&
10937 	(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10938 	xmlFreeDoc(ctxt->myDoc);
10939 	ctxt->myDoc = NULL;
10940     }
10941 
10942     if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10943         ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10944 	if (ctxt->valid)
10945 	    ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10946 	if (ctxt->nsWellFormed)
10947 	    ctxt->myDoc->properties |= XML_DOC_NSVALID;
10948 	if (ctxt->options & XML_PARSE_OLD10)
10949 	    ctxt->myDoc->properties |= XML_DOC_OLD10;
10950     }
10951     if (! ctxt->wellFormed) {
10952 	ctxt->valid = 0;
10953 	return(-1);
10954     }
10955     return(0);
10956 }
10957 
10958 /**
10959  * xmlParseExtParsedEnt:
10960  * @ctxt:  an XML parser context
10961  *
10962  * parse a general parsed entity
10963  * An external general parsed entity is well-formed if it matches the
10964  * production labeled extParsedEnt.
10965  *
10966  * [78] extParsedEnt ::= TextDecl? content
10967  *
10968  * Returns 0, -1 in case of error. the parser context is augmented
10969  *                as a result of the parsing.
10970  */
10971 
10972 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10973 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10974     xmlChar start[4];
10975     xmlCharEncoding enc;
10976 
10977     if ((ctxt == NULL) || (ctxt->input == NULL))
10978         return(-1);
10979 
10980     xmlDefaultSAXHandlerInit();
10981 
10982     xmlDetectSAX2(ctxt);
10983 
10984     GROW;
10985 
10986     /*
10987      * SAX: beginning of the document processing.
10988      */
10989     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10990         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10991 
10992     /*
10993      * Get the 4 first bytes and decode the charset
10994      * if enc != XML_CHAR_ENCODING_NONE
10995      * plug some encoding conversion routines.
10996      */
10997     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10998 	start[0] = RAW;
10999 	start[1] = NXT(1);
11000 	start[2] = NXT(2);
11001 	start[3] = NXT(3);
11002 	enc = xmlDetectCharEncoding(start, 4);
11003 	if (enc != XML_CHAR_ENCODING_NONE) {
11004 	    xmlSwitchEncoding(ctxt, enc);
11005 	}
11006     }
11007 
11008 
11009     if (CUR == 0) {
11010 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11011     }
11012 
11013     /*
11014      * Check for the XMLDecl in the Prolog.
11015      */
11016     GROW;
11017     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11018 
11019 	/*
11020 	 * Note that we will switch encoding on the fly.
11021 	 */
11022 	xmlParseXMLDecl(ctxt);
11023 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11024 	    /*
11025 	     * The XML REC instructs us to stop parsing right here
11026 	     */
11027 	    return(-1);
11028 	}
11029 	SKIP_BLANKS;
11030     } else {
11031 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11032     }
11033     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11034         ctxt->sax->startDocument(ctxt->userData);
11035     if (ctxt->instate == XML_PARSER_EOF)
11036 	return(-1);
11037 
11038     /*
11039      * Doing validity checking on chunk doesn't make sense
11040      */
11041     ctxt->instate = XML_PARSER_CONTENT;
11042     ctxt->validate = 0;
11043     ctxt->loadsubset = 0;
11044     ctxt->depth = 0;
11045 
11046     xmlParseContent(ctxt);
11047     if (ctxt->instate == XML_PARSER_EOF)
11048 	return(-1);
11049 
11050     if ((RAW == '<') && (NXT(1) == '/')) {
11051 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11052     } else if (RAW != 0) {
11053 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11054     }
11055 
11056     /*
11057      * SAX: end of the document processing.
11058      */
11059     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11060         ctxt->sax->endDocument(ctxt->userData);
11061 
11062     if (! ctxt->wellFormed) return(-1);
11063     return(0);
11064 }
11065 
11066 #ifdef LIBXML_PUSH_ENABLED
11067 /************************************************************************
11068  *									*
11069  *		Progressive parsing interfaces				*
11070  *									*
11071  ************************************************************************/
11072 
11073 /**
11074  * xmlParseLookupSequence:
11075  * @ctxt:  an XML parser context
11076  * @first:  the first char to lookup
11077  * @next:  the next char to lookup or zero
11078  * @third:  the next char to lookup or zero
11079  *
11080  * Try to find if a sequence (first, next, third) or  just (first next) or
11081  * (first) is available in the input stream.
11082  * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11083  * to avoid rescanning sequences of bytes, it DOES change the state of the
11084  * parser, do not use liberally.
11085  *
11086  * Returns the index to the current parsing point if the full sequence
11087  *      is available, -1 otherwise.
11088  */
11089 static int
xmlParseLookupSequence(xmlParserCtxtPtr ctxt,xmlChar first,xmlChar next,xmlChar third)11090 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11091                        xmlChar next, xmlChar third) {
11092     int base, len;
11093     xmlParserInputPtr in;
11094     const xmlChar *buf;
11095 
11096     in = ctxt->input;
11097     if (in == NULL) return(-1);
11098     base = in->cur - in->base;
11099     if (base < 0) return(-1);
11100     if (ctxt->checkIndex > base)
11101         base = ctxt->checkIndex;
11102     if (in->buf == NULL) {
11103 	buf = in->base;
11104 	len = in->length;
11105     } else {
11106 	buf = xmlBufContent(in->buf->buffer);
11107 	len = xmlBufUse(in->buf->buffer);
11108     }
11109     /* take into account the sequence length */
11110     if (third) len -= 2;
11111     else if (next) len --;
11112     for (;base < len;base++) {
11113         if (buf[base] == first) {
11114 	    if (third != 0) {
11115 		if ((buf[base + 1] != next) ||
11116 		    (buf[base + 2] != third)) continue;
11117 	    } else if (next != 0) {
11118 		if (buf[base + 1] != next) continue;
11119 	    }
11120 	    ctxt->checkIndex = 0;
11121 #ifdef DEBUG_PUSH
11122 	    if (next == 0)
11123 		xmlGenericError(xmlGenericErrorContext,
11124 			"PP: lookup '%c' found at %d\n",
11125 			first, base);
11126 	    else if (third == 0)
11127 		xmlGenericError(xmlGenericErrorContext,
11128 			"PP: lookup '%c%c' found at %d\n",
11129 			first, next, base);
11130 	    else
11131 		xmlGenericError(xmlGenericErrorContext,
11132 			"PP: lookup '%c%c%c' found at %d\n",
11133 			first, next, third, base);
11134 #endif
11135 	    return(base - (in->cur - in->base));
11136 	}
11137     }
11138     ctxt->checkIndex = base;
11139 #ifdef DEBUG_PUSH
11140     if (next == 0)
11141 	xmlGenericError(xmlGenericErrorContext,
11142 		"PP: lookup '%c' failed\n", first);
11143     else if (third == 0)
11144 	xmlGenericError(xmlGenericErrorContext,
11145 		"PP: lookup '%c%c' failed\n", first, next);
11146     else
11147 	xmlGenericError(xmlGenericErrorContext,
11148 		"PP: lookup '%c%c%c' failed\n", first, next, third);
11149 #endif
11150     return(-1);
11151 }
11152 
11153 /**
11154  * xmlParseGetLasts:
11155  * @ctxt:  an XML parser context
11156  * @lastlt:  pointer to store the last '<' from the input
11157  * @lastgt:  pointer to store the last '>' from the input
11158  *
11159  * Lookup the last < and > in the current chunk
11160  */
11161 static void
xmlParseGetLasts(xmlParserCtxtPtr ctxt,const xmlChar ** lastlt,const xmlChar ** lastgt)11162 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11163                  const xmlChar **lastgt) {
11164     const xmlChar *tmp;
11165 
11166     if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11167 	xmlGenericError(xmlGenericErrorContext,
11168 		    "Internal error: xmlParseGetLasts\n");
11169 	return;
11170     }
11171     if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11172         tmp = ctxt->input->end;
11173 	tmp--;
11174 	while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11175 	if (tmp < ctxt->input->base) {
11176 	    *lastlt = NULL;
11177 	    *lastgt = NULL;
11178 	} else {
11179 	    *lastlt = tmp;
11180 	    tmp++;
11181 	    while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11182 	        if (*tmp == '\'') {
11183 		    tmp++;
11184 		    while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11185 		    if (tmp < ctxt->input->end) tmp++;
11186 		} else if (*tmp == '"') {
11187 		    tmp++;
11188 		    while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11189 		    if (tmp < ctxt->input->end) tmp++;
11190 		} else
11191 		    tmp++;
11192 	    }
11193 	    if (tmp < ctxt->input->end)
11194 	        *lastgt = tmp;
11195 	    else {
11196 	        tmp = *lastlt;
11197 		tmp--;
11198 		while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11199 		if (tmp >= ctxt->input->base)
11200 		    *lastgt = tmp;
11201 		else
11202 		    *lastgt = NULL;
11203 	    }
11204 	}
11205     } else {
11206         *lastlt = NULL;
11207 	*lastgt = NULL;
11208     }
11209 }
11210 /**
11211  * xmlCheckCdataPush:
11212  * @cur: pointer to the bock of characters
11213  * @len: length of the block in bytes
11214  *
11215  * Check that the block of characters is okay as SCdata content [20]
11216  *
11217  * Returns the number of bytes to pass if okay, a negative index where an
11218  *         UTF-8 error occured otherwise
11219  */
11220 static int
xmlCheckCdataPush(const xmlChar * utf,int len)11221 xmlCheckCdataPush(const xmlChar *utf, int len) {
11222     int ix;
11223     unsigned char c;
11224     int codepoint;
11225 
11226     if ((utf == NULL) || (len <= 0))
11227         return(0);
11228 
11229     for (ix = 0; ix < len;) {      /* string is 0-terminated */
11230         c = utf[ix];
11231         if ((c & 0x80) == 0x00) {	/* 1-byte code, starts with 10 */
11232 	    if (c >= 0x20)
11233 		ix++;
11234 	    else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11235 	        ix++;
11236 	    else
11237 	        return(-ix);
11238 	} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11239 	    if (ix + 2 > len) return(-ix);
11240 	    if ((utf[ix+1] & 0xc0 ) != 0x80)
11241 	        return(-ix);
11242 	    codepoint = (utf[ix] & 0x1f) << 6;
11243 	    codepoint |= utf[ix+1] & 0x3f;
11244 	    if (!xmlIsCharQ(codepoint))
11245 	        return(-ix);
11246 	    ix += 2;
11247 	} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11248 	    if (ix + 3 > len) return(-ix);
11249 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
11250 	        ((utf[ix+2] & 0xc0) != 0x80))
11251 		    return(-ix);
11252 	    codepoint = (utf[ix] & 0xf) << 12;
11253 	    codepoint |= (utf[ix+1] & 0x3f) << 6;
11254 	    codepoint |= utf[ix+2] & 0x3f;
11255 	    if (!xmlIsCharQ(codepoint))
11256 	        return(-ix);
11257 	    ix += 3;
11258 	} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11259 	    if (ix + 4 > len) return(-ix);
11260 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
11261 	        ((utf[ix+2] & 0xc0) != 0x80) ||
11262 		((utf[ix+3] & 0xc0) != 0x80))
11263 		    return(-ix);
11264 	    codepoint = (utf[ix] & 0x7) << 18;
11265 	    codepoint |= (utf[ix+1] & 0x3f) << 12;
11266 	    codepoint |= (utf[ix+2] & 0x3f) << 6;
11267 	    codepoint |= utf[ix+3] & 0x3f;
11268 	    if (!xmlIsCharQ(codepoint))
11269 	        return(-ix);
11270 	    ix += 4;
11271 	} else				/* unknown encoding */
11272 	    return(-ix);
11273       }
11274       return(ix);
11275 }
11276 
11277 /**
11278  * xmlParseTryOrFinish:
11279  * @ctxt:  an XML parser context
11280  * @terminate:  last chunk indicator
11281  *
11282  * Try to progress on parsing
11283  *
11284  * Returns zero if no parsing was possible
11285  */
11286 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)11287 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11288     int ret = 0;
11289     int avail, tlen;
11290     xmlChar cur, next;
11291     const xmlChar *lastlt, *lastgt;
11292 
11293     if (ctxt->input == NULL)
11294         return(0);
11295 
11296 #ifdef DEBUG_PUSH
11297     switch (ctxt->instate) {
11298 	case XML_PARSER_EOF:
11299 	    xmlGenericError(xmlGenericErrorContext,
11300 		    "PP: try EOF\n"); break;
11301 	case XML_PARSER_START:
11302 	    xmlGenericError(xmlGenericErrorContext,
11303 		    "PP: try START\n"); break;
11304 	case XML_PARSER_MISC:
11305 	    xmlGenericError(xmlGenericErrorContext,
11306 		    "PP: try MISC\n");break;
11307 	case XML_PARSER_COMMENT:
11308 	    xmlGenericError(xmlGenericErrorContext,
11309 		    "PP: try COMMENT\n");break;
11310 	case XML_PARSER_PROLOG:
11311 	    xmlGenericError(xmlGenericErrorContext,
11312 		    "PP: try PROLOG\n");break;
11313 	case XML_PARSER_START_TAG:
11314 	    xmlGenericError(xmlGenericErrorContext,
11315 		    "PP: try START_TAG\n");break;
11316 	case XML_PARSER_CONTENT:
11317 	    xmlGenericError(xmlGenericErrorContext,
11318 		    "PP: try CONTENT\n");break;
11319 	case XML_PARSER_CDATA_SECTION:
11320 	    xmlGenericError(xmlGenericErrorContext,
11321 		    "PP: try CDATA_SECTION\n");break;
11322 	case XML_PARSER_END_TAG:
11323 	    xmlGenericError(xmlGenericErrorContext,
11324 		    "PP: try END_TAG\n");break;
11325 	case XML_PARSER_ENTITY_DECL:
11326 	    xmlGenericError(xmlGenericErrorContext,
11327 		    "PP: try ENTITY_DECL\n");break;
11328 	case XML_PARSER_ENTITY_VALUE:
11329 	    xmlGenericError(xmlGenericErrorContext,
11330 		    "PP: try ENTITY_VALUE\n");break;
11331 	case XML_PARSER_ATTRIBUTE_VALUE:
11332 	    xmlGenericError(xmlGenericErrorContext,
11333 		    "PP: try ATTRIBUTE_VALUE\n");break;
11334 	case XML_PARSER_DTD:
11335 	    xmlGenericError(xmlGenericErrorContext,
11336 		    "PP: try DTD\n");break;
11337 	case XML_PARSER_EPILOG:
11338 	    xmlGenericError(xmlGenericErrorContext,
11339 		    "PP: try EPILOG\n");break;
11340 	case XML_PARSER_PI:
11341 	    xmlGenericError(xmlGenericErrorContext,
11342 		    "PP: try PI\n");break;
11343         case XML_PARSER_IGNORE:
11344             xmlGenericError(xmlGenericErrorContext,
11345 		    "PP: try IGNORE\n");break;
11346     }
11347 #endif
11348 
11349     if ((ctxt->input != NULL) &&
11350         (ctxt->input->cur - ctxt->input->base > 4096)) {
11351 	xmlSHRINK(ctxt);
11352 	ctxt->checkIndex = 0;
11353     }
11354     xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11355 
11356     while (ctxt->instate != XML_PARSER_EOF) {
11357 	if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11358 	    return(0);
11359 
11360 
11361 	/*
11362 	 * Pop-up of finished entities.
11363 	 */
11364 	while ((RAW == 0) && (ctxt->inputNr > 1))
11365 	    xmlPopInput(ctxt);
11366 
11367 	if (ctxt->input == NULL) break;
11368 	if (ctxt->input->buf == NULL)
11369 	    avail = ctxt->input->length -
11370 	            (ctxt->input->cur - ctxt->input->base);
11371 	else {
11372 	    /*
11373 	     * If we are operating on converted input, try to flush
11374 	     * remainng chars to avoid them stalling in the non-converted
11375 	     * buffer. But do not do this in document start where
11376 	     * encoding="..." may not have been read and we work on a
11377 	     * guessed encoding.
11378 	     */
11379 	    if ((ctxt->instate != XML_PARSER_START) &&
11380 	        (ctxt->input->buf->raw != NULL) &&
11381 		(xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11382                 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11383                                                  ctxt->input);
11384 		size_t current = ctxt->input->cur - ctxt->input->base;
11385 
11386 		xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11387                 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11388                                       base, current);
11389 	    }
11390 	    avail = xmlBufUse(ctxt->input->buf->buffer) -
11391 		    (ctxt->input->cur - ctxt->input->base);
11392 	}
11393         if (avail < 1)
11394 	    goto done;
11395         switch (ctxt->instate) {
11396             case XML_PARSER_EOF:
11397 	        /*
11398 		 * Document parsing is done !
11399 		 */
11400 	        goto done;
11401             case XML_PARSER_START:
11402 		if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11403 		    xmlChar start[4];
11404 		    xmlCharEncoding enc;
11405 
11406 		    /*
11407 		     * Very first chars read from the document flow.
11408 		     */
11409 		    if (avail < 4)
11410 			goto done;
11411 
11412 		    /*
11413 		     * Get the 4 first bytes and decode the charset
11414 		     * if enc != XML_CHAR_ENCODING_NONE
11415 		     * plug some encoding conversion routines,
11416 		     * else xmlSwitchEncoding will set to (default)
11417 		     * UTF8.
11418 		     */
11419 		    start[0] = RAW;
11420 		    start[1] = NXT(1);
11421 		    start[2] = NXT(2);
11422 		    start[3] = NXT(3);
11423 		    enc = xmlDetectCharEncoding(start, 4);
11424 		    xmlSwitchEncoding(ctxt, enc);
11425 		    break;
11426 		}
11427 
11428 		if (avail < 2)
11429 		    goto done;
11430 		cur = ctxt->input->cur[0];
11431 		next = ctxt->input->cur[1];
11432 		if (cur == 0) {
11433 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11434 			ctxt->sax->setDocumentLocator(ctxt->userData,
11435 						      &xmlDefaultSAXLocator);
11436 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11437 		    xmlHaltParser(ctxt);
11438 #ifdef DEBUG_PUSH
11439 		    xmlGenericError(xmlGenericErrorContext,
11440 			    "PP: entering EOF\n");
11441 #endif
11442 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11443 			ctxt->sax->endDocument(ctxt->userData);
11444 		    goto done;
11445 		}
11446 	        if ((cur == '<') && (next == '?')) {
11447 		    /* PI or XML decl */
11448 		    if (avail < 5) return(ret);
11449 		    if ((!terminate) &&
11450 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11451 			return(ret);
11452 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11453 			ctxt->sax->setDocumentLocator(ctxt->userData,
11454 						      &xmlDefaultSAXLocator);
11455 		    if ((ctxt->input->cur[2] == 'x') &&
11456 			(ctxt->input->cur[3] == 'm') &&
11457 			(ctxt->input->cur[4] == 'l') &&
11458 			(IS_BLANK_CH(ctxt->input->cur[5]))) {
11459 			ret += 5;
11460 #ifdef DEBUG_PUSH
11461 			xmlGenericError(xmlGenericErrorContext,
11462 				"PP: Parsing XML Decl\n");
11463 #endif
11464 			xmlParseXMLDecl(ctxt);
11465 			if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11466 			    /*
11467 			     * The XML REC instructs us to stop parsing right
11468 			     * here
11469 			     */
11470 			    xmlHaltParser(ctxt);
11471 			    return(0);
11472 			}
11473 			ctxt->standalone = ctxt->input->standalone;
11474 			if ((ctxt->encoding == NULL) &&
11475 			    (ctxt->input->encoding != NULL))
11476 			    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11477 			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11478 			    (!ctxt->disableSAX))
11479 			    ctxt->sax->startDocument(ctxt->userData);
11480 			ctxt->instate = XML_PARSER_MISC;
11481 #ifdef DEBUG_PUSH
11482 			xmlGenericError(xmlGenericErrorContext,
11483 				"PP: entering MISC\n");
11484 #endif
11485 		    } else {
11486 			ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11487 			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11488 			    (!ctxt->disableSAX))
11489 			    ctxt->sax->startDocument(ctxt->userData);
11490 			ctxt->instate = XML_PARSER_MISC;
11491 #ifdef DEBUG_PUSH
11492 			xmlGenericError(xmlGenericErrorContext,
11493 				"PP: entering MISC\n");
11494 #endif
11495 		    }
11496 		} else {
11497 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11498 			ctxt->sax->setDocumentLocator(ctxt->userData,
11499 						      &xmlDefaultSAXLocator);
11500 		    ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11501 		    if (ctxt->version == NULL) {
11502 		        xmlErrMemory(ctxt, NULL);
11503 			break;
11504 		    }
11505 		    if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11506 		        (!ctxt->disableSAX))
11507 			ctxt->sax->startDocument(ctxt->userData);
11508 		    ctxt->instate = XML_PARSER_MISC;
11509 #ifdef DEBUG_PUSH
11510 		    xmlGenericError(xmlGenericErrorContext,
11511 			    "PP: entering MISC\n");
11512 #endif
11513 		}
11514 		break;
11515             case XML_PARSER_START_TAG: {
11516 	        const xmlChar *name;
11517 		const xmlChar *prefix = NULL;
11518 		const xmlChar *URI = NULL;
11519 		int nsNr = ctxt->nsNr;
11520 
11521 		if ((avail < 2) && (ctxt->inputNr == 1))
11522 		    goto done;
11523 		cur = ctxt->input->cur[0];
11524 	        if (cur != '<') {
11525 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11526 		    xmlHaltParser(ctxt);
11527 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11528 			ctxt->sax->endDocument(ctxt->userData);
11529 		    goto done;
11530 		}
11531 		if (!terminate) {
11532 		    if (ctxt->progressive) {
11533 		        /* > can be found unescaped in attribute values */
11534 		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11535 			    goto done;
11536 		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11537 			goto done;
11538 		    }
11539 		}
11540 		if (ctxt->spaceNr == 0)
11541 		    spacePush(ctxt, -1);
11542 		else if (*ctxt->space == -2)
11543 		    spacePush(ctxt, -1);
11544 		else
11545 		    spacePush(ctxt, *ctxt->space);
11546 #ifdef LIBXML_SAX1_ENABLED
11547 		if (ctxt->sax2)
11548 #endif /* LIBXML_SAX1_ENABLED */
11549 		    name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11550 #ifdef LIBXML_SAX1_ENABLED
11551 		else
11552 		    name = xmlParseStartTag(ctxt);
11553 #endif /* LIBXML_SAX1_ENABLED */
11554 		if (ctxt->instate == XML_PARSER_EOF)
11555 		    goto done;
11556 		if (name == NULL) {
11557 		    spacePop(ctxt);
11558 		    xmlHaltParser(ctxt);
11559 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11560 			ctxt->sax->endDocument(ctxt->userData);
11561 		    goto done;
11562 		}
11563 #ifdef LIBXML_VALID_ENABLED
11564 		/*
11565 		 * [ VC: Root Element Type ]
11566 		 * The Name in the document type declaration must match
11567 		 * the element type of the root element.
11568 		 */
11569 		if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11570 		    ctxt->node && (ctxt->node == ctxt->myDoc->children))
11571 		    ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11572 #endif /* LIBXML_VALID_ENABLED */
11573 
11574 		/*
11575 		 * Check for an Empty Element.
11576 		 */
11577 		if ((RAW == '/') && (NXT(1) == '>')) {
11578 		    SKIP(2);
11579 
11580 		    if (ctxt->sax2) {
11581 			if ((ctxt->sax != NULL) &&
11582 			    (ctxt->sax->endElementNs != NULL) &&
11583 			    (!ctxt->disableSAX))
11584 			    ctxt->sax->endElementNs(ctxt->userData, name,
11585 			                            prefix, URI);
11586 			if (ctxt->nsNr - nsNr > 0)
11587 			    nsPop(ctxt, ctxt->nsNr - nsNr);
11588 #ifdef LIBXML_SAX1_ENABLED
11589 		    } else {
11590 			if ((ctxt->sax != NULL) &&
11591 			    (ctxt->sax->endElement != NULL) &&
11592 			    (!ctxt->disableSAX))
11593 			    ctxt->sax->endElement(ctxt->userData, name);
11594 #endif /* LIBXML_SAX1_ENABLED */
11595 		    }
11596 		    if (ctxt->instate == XML_PARSER_EOF)
11597 			goto done;
11598 		    spacePop(ctxt);
11599 		    if (ctxt->nameNr == 0) {
11600 			ctxt->instate = XML_PARSER_EPILOG;
11601 		    } else {
11602 			ctxt->instate = XML_PARSER_CONTENT;
11603 		    }
11604                     ctxt->progressive = 1;
11605 		    break;
11606 		}
11607 		if (RAW == '>') {
11608 		    NEXT;
11609 		} else {
11610 		    xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11611 					 "Couldn't find end of Start Tag %s\n",
11612 					 name);
11613 		    nodePop(ctxt);
11614 		    spacePop(ctxt);
11615 		}
11616 		if (ctxt->sax2)
11617 		    nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11618 #ifdef LIBXML_SAX1_ENABLED
11619 		else
11620 		    namePush(ctxt, name);
11621 #endif /* LIBXML_SAX1_ENABLED */
11622 
11623 		ctxt->instate = XML_PARSER_CONTENT;
11624                 ctxt->progressive = 1;
11625                 break;
11626 	    }
11627             case XML_PARSER_CONTENT: {
11628 		const xmlChar *test;
11629 		unsigned int cons;
11630 		if ((avail < 2) && (ctxt->inputNr == 1))
11631 		    goto done;
11632 		cur = ctxt->input->cur[0];
11633 		next = ctxt->input->cur[1];
11634 
11635 		test = CUR_PTR;
11636 	        cons = ctxt->input->consumed;
11637 		if ((cur == '<') && (next == '/')) {
11638 		    ctxt->instate = XML_PARSER_END_TAG;
11639 		    break;
11640 	        } else if ((cur == '<') && (next == '?')) {
11641 		    if ((!terminate) &&
11642 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11643                         ctxt->progressive = XML_PARSER_PI;
11644 			goto done;
11645                     }
11646 		    xmlParsePI(ctxt);
11647 		    ctxt->instate = XML_PARSER_CONTENT;
11648                     ctxt->progressive = 1;
11649 		} else if ((cur == '<') && (next != '!')) {
11650 		    ctxt->instate = XML_PARSER_START_TAG;
11651 		    break;
11652 		} else if ((cur == '<') && (next == '!') &&
11653 		           (ctxt->input->cur[2] == '-') &&
11654 			   (ctxt->input->cur[3] == '-')) {
11655 		    int term;
11656 
11657 	            if (avail < 4)
11658 		        goto done;
11659 		    ctxt->input->cur += 4;
11660 		    term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11661 		    ctxt->input->cur -= 4;
11662 		    if ((!terminate) && (term < 0)) {
11663                         ctxt->progressive = XML_PARSER_COMMENT;
11664 			goto done;
11665                     }
11666 		    xmlParseComment(ctxt);
11667 		    ctxt->instate = XML_PARSER_CONTENT;
11668                     ctxt->progressive = 1;
11669 		} else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11670 		    (ctxt->input->cur[2] == '[') &&
11671 		    (ctxt->input->cur[3] == 'C') &&
11672 		    (ctxt->input->cur[4] == 'D') &&
11673 		    (ctxt->input->cur[5] == 'A') &&
11674 		    (ctxt->input->cur[6] == 'T') &&
11675 		    (ctxt->input->cur[7] == 'A') &&
11676 		    (ctxt->input->cur[8] == '[')) {
11677 		    SKIP(9);
11678 		    ctxt->instate = XML_PARSER_CDATA_SECTION;
11679 		    break;
11680 		} else if ((cur == '<') && (next == '!') &&
11681 		           (avail < 9)) {
11682 		    goto done;
11683 		} else if (cur == '&') {
11684 		    if ((!terminate) &&
11685 		        (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11686 			goto done;
11687 		    xmlParseReference(ctxt);
11688 		} else {
11689 		    /* TODO Avoid the extra copy, handle directly !!! */
11690 		    /*
11691 		     * Goal of the following test is:
11692 		     *  - minimize calls to the SAX 'character' callback
11693 		     *    when they are mergeable
11694 		     *  - handle an problem for isBlank when we only parse
11695 		     *    a sequence of blank chars and the next one is
11696 		     *    not available to check against '<' presence.
11697 		     *  - tries to homogenize the differences in SAX
11698 		     *    callbacks between the push and pull versions
11699 		     *    of the parser.
11700 		     */
11701 		    if ((ctxt->inputNr == 1) &&
11702 		        (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11703 			if (!terminate) {
11704 			    if (ctxt->progressive) {
11705 				if ((lastlt == NULL) ||
11706 				    (ctxt->input->cur > lastlt))
11707 				    goto done;
11708 			    } else if (xmlParseLookupSequence(ctxt,
11709 			                                      '<', 0, 0) < 0) {
11710 				goto done;
11711 			    }
11712 			}
11713                     }
11714 		    ctxt->checkIndex = 0;
11715 		    xmlParseCharData(ctxt, 0);
11716 		}
11717 		/*
11718 		 * Pop-up of finished entities.
11719 		 */
11720 		while ((RAW == 0) && (ctxt->inputNr > 1))
11721 		    xmlPopInput(ctxt);
11722 		if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11723 		    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11724 		                "detected an error in element content\n");
11725 		    xmlHaltParser(ctxt);
11726 		    break;
11727 		}
11728 		break;
11729 	    }
11730             case XML_PARSER_END_TAG:
11731 		if (avail < 2)
11732 		    goto done;
11733 		if (!terminate) {
11734 		    if (ctxt->progressive) {
11735 		        /* > can be found unescaped in attribute values */
11736 		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11737 			    goto done;
11738 		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11739 			goto done;
11740 		    }
11741 		}
11742 		if (ctxt->sax2) {
11743 		    xmlParseEndTag2(ctxt,
11744 		           (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11745 		           (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11746 		       (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11747 		    nameNsPop(ctxt);
11748 		}
11749 #ifdef LIBXML_SAX1_ENABLED
11750 		  else
11751 		    xmlParseEndTag1(ctxt, 0);
11752 #endif /* LIBXML_SAX1_ENABLED */
11753 		if (ctxt->instate == XML_PARSER_EOF) {
11754 		    /* Nothing */
11755 		} else if (ctxt->nameNr == 0) {
11756 		    ctxt->instate = XML_PARSER_EPILOG;
11757 		} else {
11758 		    ctxt->instate = XML_PARSER_CONTENT;
11759 		}
11760 		break;
11761             case XML_PARSER_CDATA_SECTION: {
11762 	        /*
11763 		 * The Push mode need to have the SAX callback for
11764 		 * cdataBlock merge back contiguous callbacks.
11765 		 */
11766 		int base;
11767 
11768 		base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11769 		if (base < 0) {
11770 		    if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11771 		        int tmp;
11772 
11773 			tmp = xmlCheckCdataPush(ctxt->input->cur,
11774 			                        XML_PARSER_BIG_BUFFER_SIZE);
11775 			if (tmp < 0) {
11776 			    tmp = -tmp;
11777 			    ctxt->input->cur += tmp;
11778 			    goto encoding_error;
11779 			}
11780 			if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11781 			    if (ctxt->sax->cdataBlock != NULL)
11782 				ctxt->sax->cdataBlock(ctxt->userData,
11783 				                      ctxt->input->cur, tmp);
11784 			    else if (ctxt->sax->characters != NULL)
11785 				ctxt->sax->characters(ctxt->userData,
11786 				                      ctxt->input->cur, tmp);
11787 			}
11788 			if (ctxt->instate == XML_PARSER_EOF)
11789 			    goto done;
11790 			SKIPL(tmp);
11791 			ctxt->checkIndex = 0;
11792 		    }
11793 		    goto done;
11794 		} else {
11795 		    int tmp;
11796 
11797 		    tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11798 		    if ((tmp < 0) || (tmp != base)) {
11799 			tmp = -tmp;
11800 			ctxt->input->cur += tmp;
11801 			goto encoding_error;
11802 		    }
11803 		    if ((ctxt->sax != NULL) && (base == 0) &&
11804 		        (ctxt->sax->cdataBlock != NULL) &&
11805 		        (!ctxt->disableSAX)) {
11806 			/*
11807 			 * Special case to provide identical behaviour
11808 			 * between pull and push parsers on enpty CDATA
11809 			 * sections
11810 			 */
11811 			 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11812 			     (!strncmp((const char *)&ctxt->input->cur[-9],
11813 			               "<![CDATA[", 9)))
11814 			     ctxt->sax->cdataBlock(ctxt->userData,
11815 			                           BAD_CAST "", 0);
11816 		    } else if ((ctxt->sax != NULL) && (base > 0) &&
11817 			(!ctxt->disableSAX)) {
11818 			if (ctxt->sax->cdataBlock != NULL)
11819 			    ctxt->sax->cdataBlock(ctxt->userData,
11820 						  ctxt->input->cur, base);
11821 			else if (ctxt->sax->characters != NULL)
11822 			    ctxt->sax->characters(ctxt->userData,
11823 						  ctxt->input->cur, base);
11824 		    }
11825 		    if (ctxt->instate == XML_PARSER_EOF)
11826 			goto done;
11827 		    SKIPL(base + 3);
11828 		    ctxt->checkIndex = 0;
11829 		    ctxt->instate = XML_PARSER_CONTENT;
11830 #ifdef DEBUG_PUSH
11831 		    xmlGenericError(xmlGenericErrorContext,
11832 			    "PP: entering CONTENT\n");
11833 #endif
11834 		}
11835 		break;
11836 	    }
11837             case XML_PARSER_MISC:
11838 		SKIP_BLANKS;
11839 		if (ctxt->input->buf == NULL)
11840 		    avail = ctxt->input->length -
11841 		            (ctxt->input->cur - ctxt->input->base);
11842 		else
11843 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11844 		            (ctxt->input->cur - ctxt->input->base);
11845 		if (avail < 2)
11846 		    goto done;
11847 		cur = ctxt->input->cur[0];
11848 		next = ctxt->input->cur[1];
11849 	        if ((cur == '<') && (next == '?')) {
11850 		    if ((!terminate) &&
11851 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11852                         ctxt->progressive = XML_PARSER_PI;
11853 			goto done;
11854                     }
11855 #ifdef DEBUG_PUSH
11856 		    xmlGenericError(xmlGenericErrorContext,
11857 			    "PP: Parsing PI\n");
11858 #endif
11859 		    xmlParsePI(ctxt);
11860 		    if (ctxt->instate == XML_PARSER_EOF)
11861 			goto done;
11862 		    ctxt->instate = XML_PARSER_MISC;
11863                     ctxt->progressive = 1;
11864 		    ctxt->checkIndex = 0;
11865 		} else if ((cur == '<') && (next == '!') &&
11866 		    (ctxt->input->cur[2] == '-') &&
11867 		    (ctxt->input->cur[3] == '-')) {
11868 		    if ((!terminate) &&
11869 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11870                         ctxt->progressive = XML_PARSER_COMMENT;
11871 			goto done;
11872                     }
11873 #ifdef DEBUG_PUSH
11874 		    xmlGenericError(xmlGenericErrorContext,
11875 			    "PP: Parsing Comment\n");
11876 #endif
11877 		    xmlParseComment(ctxt);
11878 		    if (ctxt->instate == XML_PARSER_EOF)
11879 			goto done;
11880 		    ctxt->instate = XML_PARSER_MISC;
11881                     ctxt->progressive = 1;
11882 		    ctxt->checkIndex = 0;
11883 		} else if ((cur == '<') && (next == '!') &&
11884 		    (ctxt->input->cur[2] == 'D') &&
11885 		    (ctxt->input->cur[3] == 'O') &&
11886 		    (ctxt->input->cur[4] == 'C') &&
11887 		    (ctxt->input->cur[5] == 'T') &&
11888 		    (ctxt->input->cur[6] == 'Y') &&
11889 		    (ctxt->input->cur[7] == 'P') &&
11890 		    (ctxt->input->cur[8] == 'E')) {
11891 		    if ((!terminate) &&
11892 		        (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11893                         ctxt->progressive = XML_PARSER_DTD;
11894 			goto done;
11895                     }
11896 #ifdef DEBUG_PUSH
11897 		    xmlGenericError(xmlGenericErrorContext,
11898 			    "PP: Parsing internal subset\n");
11899 #endif
11900 		    ctxt->inSubset = 1;
11901                     ctxt->progressive = 0;
11902 		    ctxt->checkIndex = 0;
11903 		    xmlParseDocTypeDecl(ctxt);
11904 		    if (ctxt->instate == XML_PARSER_EOF)
11905 			goto done;
11906 		    if (RAW == '[') {
11907 			ctxt->instate = XML_PARSER_DTD;
11908 #ifdef DEBUG_PUSH
11909 			xmlGenericError(xmlGenericErrorContext,
11910 				"PP: entering DTD\n");
11911 #endif
11912 		    } else {
11913 			/*
11914 			 * Create and update the external subset.
11915 			 */
11916 			ctxt->inSubset = 2;
11917 			if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11918 			    (ctxt->sax->externalSubset != NULL))
11919 			    ctxt->sax->externalSubset(ctxt->userData,
11920 				    ctxt->intSubName, ctxt->extSubSystem,
11921 				    ctxt->extSubURI);
11922 			ctxt->inSubset = 0;
11923 			xmlCleanSpecialAttr(ctxt);
11924 			ctxt->instate = XML_PARSER_PROLOG;
11925 #ifdef DEBUG_PUSH
11926 			xmlGenericError(xmlGenericErrorContext,
11927 				"PP: entering PROLOG\n");
11928 #endif
11929 		    }
11930 		} else if ((cur == '<') && (next == '!') &&
11931 		           (avail < 9)) {
11932 		    goto done;
11933 		} else {
11934 		    ctxt->instate = XML_PARSER_START_TAG;
11935 		    ctxt->progressive = XML_PARSER_START_TAG;
11936 		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11937 #ifdef DEBUG_PUSH
11938 		    xmlGenericError(xmlGenericErrorContext,
11939 			    "PP: entering START_TAG\n");
11940 #endif
11941 		}
11942 		break;
11943             case XML_PARSER_PROLOG:
11944 		SKIP_BLANKS;
11945 		if (ctxt->input->buf == NULL)
11946 		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11947 		else
11948 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11949                             (ctxt->input->cur - ctxt->input->base);
11950 		if (avail < 2)
11951 		    goto done;
11952 		cur = ctxt->input->cur[0];
11953 		next = ctxt->input->cur[1];
11954 	        if ((cur == '<') && (next == '?')) {
11955 		    if ((!terminate) &&
11956 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11957                         ctxt->progressive = XML_PARSER_PI;
11958 			goto done;
11959                     }
11960 #ifdef DEBUG_PUSH
11961 		    xmlGenericError(xmlGenericErrorContext,
11962 			    "PP: Parsing PI\n");
11963 #endif
11964 		    xmlParsePI(ctxt);
11965 		    if (ctxt->instate == XML_PARSER_EOF)
11966 			goto done;
11967 		    ctxt->instate = XML_PARSER_PROLOG;
11968                     ctxt->progressive = 1;
11969 		} else if ((cur == '<') && (next == '!') &&
11970 		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11971 		    if ((!terminate) &&
11972 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11973                         ctxt->progressive = XML_PARSER_COMMENT;
11974 			goto done;
11975                     }
11976 #ifdef DEBUG_PUSH
11977 		    xmlGenericError(xmlGenericErrorContext,
11978 			    "PP: Parsing Comment\n");
11979 #endif
11980 		    xmlParseComment(ctxt);
11981 		    if (ctxt->instate == XML_PARSER_EOF)
11982 			goto done;
11983 		    ctxt->instate = XML_PARSER_PROLOG;
11984                     ctxt->progressive = 1;
11985 		} else if ((cur == '<') && (next == '!') &&
11986 		           (avail < 4)) {
11987 		    goto done;
11988 		} else {
11989 		    ctxt->instate = XML_PARSER_START_TAG;
11990 		    if (ctxt->progressive == 0)
11991 			ctxt->progressive = XML_PARSER_START_TAG;
11992 		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11993 #ifdef DEBUG_PUSH
11994 		    xmlGenericError(xmlGenericErrorContext,
11995 			    "PP: entering START_TAG\n");
11996 #endif
11997 		}
11998 		break;
11999             case XML_PARSER_EPILOG:
12000 		SKIP_BLANKS;
12001 		if (ctxt->input->buf == NULL)
12002 		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
12003 		else
12004 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
12005                             (ctxt->input->cur - ctxt->input->base);
12006 		if (avail < 2)
12007 		    goto done;
12008 		cur = ctxt->input->cur[0];
12009 		next = ctxt->input->cur[1];
12010 	        if ((cur == '<') && (next == '?')) {
12011 		    if ((!terminate) &&
12012 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
12013                         ctxt->progressive = XML_PARSER_PI;
12014 			goto done;
12015                     }
12016 #ifdef DEBUG_PUSH
12017 		    xmlGenericError(xmlGenericErrorContext,
12018 			    "PP: Parsing PI\n");
12019 #endif
12020 		    xmlParsePI(ctxt);
12021 		    if (ctxt->instate == XML_PARSER_EOF)
12022 			goto done;
12023 		    ctxt->instate = XML_PARSER_EPILOG;
12024                     ctxt->progressive = 1;
12025 		} else if ((cur == '<') && (next == '!') &&
12026 		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
12027 		    if ((!terminate) &&
12028 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
12029                         ctxt->progressive = XML_PARSER_COMMENT;
12030 			goto done;
12031                     }
12032 #ifdef DEBUG_PUSH
12033 		    xmlGenericError(xmlGenericErrorContext,
12034 			    "PP: Parsing Comment\n");
12035 #endif
12036 		    xmlParseComment(ctxt);
12037 		    if (ctxt->instate == XML_PARSER_EOF)
12038 			goto done;
12039 		    ctxt->instate = XML_PARSER_EPILOG;
12040                     ctxt->progressive = 1;
12041 		} else if ((cur == '<') && (next == '!') &&
12042 		           (avail < 4)) {
12043 		    goto done;
12044 		} else {
12045 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12046 		    xmlHaltParser(ctxt);
12047 #ifdef DEBUG_PUSH
12048 		    xmlGenericError(xmlGenericErrorContext,
12049 			    "PP: entering EOF\n");
12050 #endif
12051 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12052 			ctxt->sax->endDocument(ctxt->userData);
12053 		    goto done;
12054 		}
12055 		break;
12056             case XML_PARSER_DTD: {
12057 	        /*
12058 		 * Sorry but progressive parsing of the internal subset
12059 		 * is not expected to be supported. We first check that
12060 		 * the full content of the internal subset is available and
12061 		 * the parsing is launched only at that point.
12062 		 * Internal subset ends up with "']' S? '>'" in an unescaped
12063 		 * section and not in a ']]>' sequence which are conditional
12064 		 * sections (whoever argued to keep that crap in XML deserve
12065 		 * a place in hell !).
12066 		 */
12067 		int base, i;
12068 		xmlChar *buf;
12069 	        xmlChar quote = 0;
12070                 size_t use;
12071 
12072 		base = ctxt->input->cur - ctxt->input->base;
12073 		if (base < 0) return(0);
12074 		if (ctxt->checkIndex > base)
12075 		    base = ctxt->checkIndex;
12076 		buf = xmlBufContent(ctxt->input->buf->buffer);
12077                 use = xmlBufUse(ctxt->input->buf->buffer);
12078 		for (;(unsigned int) base < use; base++) {
12079 		    if (quote != 0) {
12080 		        if (buf[base] == quote)
12081 			    quote = 0;
12082 			continue;
12083 		    }
12084 		    if ((quote == 0) && (buf[base] == '<')) {
12085 		        int found  = 0;
12086 			/* special handling of comments */
12087 		        if (((unsigned int) base + 4 < use) &&
12088 			    (buf[base + 1] == '!') &&
12089 			    (buf[base + 2] == '-') &&
12090 			    (buf[base + 3] == '-')) {
12091 			    for (;(unsigned int) base + 3 < use; base++) {
12092 				if ((buf[base] == '-') &&
12093 				    (buf[base + 1] == '-') &&
12094 				    (buf[base + 2] == '>')) {
12095 				    found = 1;
12096 				    base += 2;
12097 				    break;
12098 				}
12099 		            }
12100 			    if (!found) {
12101 #if 0
12102 			        fprintf(stderr, "unfinished comment\n");
12103 #endif
12104 			        break; /* for */
12105 		            }
12106 		            continue;
12107 			}
12108 		    }
12109 		    if (buf[base] == '"') {
12110 		        quote = '"';
12111 			continue;
12112 		    }
12113 		    if (buf[base] == '\'') {
12114 		        quote = '\'';
12115 			continue;
12116 		    }
12117 		    if (buf[base] == ']') {
12118 #if 0
12119 		        fprintf(stderr, "%c%c%c%c: ", buf[base],
12120 			        buf[base + 1], buf[base + 2], buf[base + 3]);
12121 #endif
12122 		        if ((unsigned int) base +1 >= use)
12123 			    break;
12124 			if (buf[base + 1] == ']') {
12125 			    /* conditional crap, skip both ']' ! */
12126 			    base++;
12127 			    continue;
12128 			}
12129 		        for (i = 1; (unsigned int) base + i < use; i++) {
12130 			    if (buf[base + i] == '>') {
12131 #if 0
12132 			        fprintf(stderr, "found\n");
12133 #endif
12134 			        goto found_end_int_subset;
12135 			    }
12136 			    if (!IS_BLANK_CH(buf[base + i])) {
12137 #if 0
12138 			        fprintf(stderr, "not found\n");
12139 #endif
12140 			        goto not_end_of_int_subset;
12141 			    }
12142 			}
12143 #if 0
12144 			fprintf(stderr, "end of stream\n");
12145 #endif
12146 		        break;
12147 
12148 		    }
12149 not_end_of_int_subset:
12150                     continue; /* for */
12151 		}
12152 		/*
12153 		 * We didn't found the end of the Internal subset
12154 		 */
12155                 if (quote == 0)
12156                     ctxt->checkIndex = base;
12157                 else
12158                     ctxt->checkIndex = 0;
12159 #ifdef DEBUG_PUSH
12160 		if (next == 0)
12161 		    xmlGenericError(xmlGenericErrorContext,
12162 			    "PP: lookup of int subset end filed\n");
12163 #endif
12164 	        goto done;
12165 
12166 found_end_int_subset:
12167                 ctxt->checkIndex = 0;
12168 		xmlParseInternalSubset(ctxt);
12169 		if (ctxt->instate == XML_PARSER_EOF)
12170 		    goto done;
12171 		ctxt->inSubset = 2;
12172 		if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12173 		    (ctxt->sax->externalSubset != NULL))
12174 		    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12175 			    ctxt->extSubSystem, ctxt->extSubURI);
12176 		ctxt->inSubset = 0;
12177 		xmlCleanSpecialAttr(ctxt);
12178 		if (ctxt->instate == XML_PARSER_EOF)
12179 		    goto done;
12180 		ctxt->instate = XML_PARSER_PROLOG;
12181 		ctxt->checkIndex = 0;
12182 #ifdef DEBUG_PUSH
12183 		xmlGenericError(xmlGenericErrorContext,
12184 			"PP: entering PROLOG\n");
12185 #endif
12186                 break;
12187 	    }
12188             case XML_PARSER_COMMENT:
12189 		xmlGenericError(xmlGenericErrorContext,
12190 			"PP: internal error, state == COMMENT\n");
12191 		ctxt->instate = XML_PARSER_CONTENT;
12192 #ifdef DEBUG_PUSH
12193 		xmlGenericError(xmlGenericErrorContext,
12194 			"PP: entering CONTENT\n");
12195 #endif
12196 		break;
12197             case XML_PARSER_IGNORE:
12198 		xmlGenericError(xmlGenericErrorContext,
12199 			"PP: internal error, state == IGNORE");
12200 	        ctxt->instate = XML_PARSER_DTD;
12201 #ifdef DEBUG_PUSH
12202 		xmlGenericError(xmlGenericErrorContext,
12203 			"PP: entering DTD\n");
12204 #endif
12205 	        break;
12206             case XML_PARSER_PI:
12207 		xmlGenericError(xmlGenericErrorContext,
12208 			"PP: internal error, state == PI\n");
12209 		ctxt->instate = XML_PARSER_CONTENT;
12210 #ifdef DEBUG_PUSH
12211 		xmlGenericError(xmlGenericErrorContext,
12212 			"PP: entering CONTENT\n");
12213 #endif
12214 		break;
12215             case XML_PARSER_ENTITY_DECL:
12216 		xmlGenericError(xmlGenericErrorContext,
12217 			"PP: internal error, state == ENTITY_DECL\n");
12218 		ctxt->instate = XML_PARSER_DTD;
12219 #ifdef DEBUG_PUSH
12220 		xmlGenericError(xmlGenericErrorContext,
12221 			"PP: entering DTD\n");
12222 #endif
12223 		break;
12224             case XML_PARSER_ENTITY_VALUE:
12225 		xmlGenericError(xmlGenericErrorContext,
12226 			"PP: internal error, state == ENTITY_VALUE\n");
12227 		ctxt->instate = XML_PARSER_CONTENT;
12228 #ifdef DEBUG_PUSH
12229 		xmlGenericError(xmlGenericErrorContext,
12230 			"PP: entering DTD\n");
12231 #endif
12232 		break;
12233             case XML_PARSER_ATTRIBUTE_VALUE:
12234 		xmlGenericError(xmlGenericErrorContext,
12235 			"PP: internal error, state == ATTRIBUTE_VALUE\n");
12236 		ctxt->instate = XML_PARSER_START_TAG;
12237 #ifdef DEBUG_PUSH
12238 		xmlGenericError(xmlGenericErrorContext,
12239 			"PP: entering START_TAG\n");
12240 #endif
12241 		break;
12242             case XML_PARSER_SYSTEM_LITERAL:
12243 		xmlGenericError(xmlGenericErrorContext,
12244 			"PP: internal error, state == SYSTEM_LITERAL\n");
12245 		ctxt->instate = XML_PARSER_START_TAG;
12246 #ifdef DEBUG_PUSH
12247 		xmlGenericError(xmlGenericErrorContext,
12248 			"PP: entering START_TAG\n");
12249 #endif
12250 		break;
12251             case XML_PARSER_PUBLIC_LITERAL:
12252 		xmlGenericError(xmlGenericErrorContext,
12253 			"PP: internal error, state == PUBLIC_LITERAL\n");
12254 		ctxt->instate = XML_PARSER_START_TAG;
12255 #ifdef DEBUG_PUSH
12256 		xmlGenericError(xmlGenericErrorContext,
12257 			"PP: entering START_TAG\n");
12258 #endif
12259 		break;
12260 	}
12261     }
12262 done:
12263 #ifdef DEBUG_PUSH
12264     xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12265 #endif
12266     return(ret);
12267 encoding_error:
12268     {
12269         char buffer[150];
12270 
12271 	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12272 			ctxt->input->cur[0], ctxt->input->cur[1],
12273 			ctxt->input->cur[2], ctxt->input->cur[3]);
12274 	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12275 		     "Input is not proper UTF-8, indicate encoding !\n%s",
12276 		     BAD_CAST buffer, NULL);
12277     }
12278     return(0);
12279 }
12280 
12281 /**
12282  * xmlParseCheckTransition:
12283  * @ctxt:  an XML parser context
12284  * @chunk:  a char array
12285  * @size:  the size in byte of the chunk
12286  *
12287  * Check depending on the current parser state if the chunk given must be
12288  * processed immediately or one need more data to advance on parsing.
12289  *
12290  * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12291  */
12292 static int
xmlParseCheckTransition(xmlParserCtxtPtr ctxt,const char * chunk,int size)12293 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12294     if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12295         return(-1);
12296     if (ctxt->instate == XML_PARSER_START_TAG) {
12297         if (memchr(chunk, '>', size) != NULL)
12298             return(1);
12299         return(0);
12300     }
12301     if (ctxt->progressive == XML_PARSER_COMMENT) {
12302         if (memchr(chunk, '>', size) != NULL)
12303             return(1);
12304         return(0);
12305     }
12306     if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12307         if (memchr(chunk, '>', size) != NULL)
12308             return(1);
12309         return(0);
12310     }
12311     if (ctxt->progressive == XML_PARSER_PI) {
12312         if (memchr(chunk, '>', size) != NULL)
12313             return(1);
12314         return(0);
12315     }
12316     if (ctxt->instate == XML_PARSER_END_TAG) {
12317         if (memchr(chunk, '>', size) != NULL)
12318             return(1);
12319         return(0);
12320     }
12321     if ((ctxt->progressive == XML_PARSER_DTD) ||
12322         (ctxt->instate == XML_PARSER_DTD)) {
12323         if (memchr(chunk, '>', size) != NULL)
12324             return(1);
12325         return(0);
12326     }
12327     return(1);
12328 }
12329 
12330 /**
12331  * xmlParseChunk:
12332  * @ctxt:  an XML parser context
12333  * @chunk:  an char array
12334  * @size:  the size in byte of the chunk
12335  * @terminate:  last chunk indicator
12336  *
12337  * Parse a Chunk of memory
12338  *
12339  * Returns zero if no error, the xmlParserErrors otherwise.
12340  */
12341 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)12342 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12343               int terminate) {
12344     int end_in_lf = 0;
12345     int remain = 0;
12346     size_t old_avail = 0;
12347     size_t avail = 0;
12348 
12349     if (ctxt == NULL)
12350         return(XML_ERR_INTERNAL_ERROR);
12351     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12352         return(ctxt->errNo);
12353     if (ctxt->instate == XML_PARSER_EOF)
12354         return(-1);
12355     if (ctxt->instate == XML_PARSER_START)
12356         xmlDetectSAX2(ctxt);
12357     if ((size > 0) && (chunk != NULL) && (!terminate) &&
12358         (chunk[size - 1] == '\r')) {
12359 	end_in_lf = 1;
12360 	size--;
12361     }
12362 
12363 xmldecl_done:
12364 
12365     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12366         (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12367 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12368 	size_t cur = ctxt->input->cur - ctxt->input->base;
12369 	int res;
12370 
12371         old_avail = xmlBufUse(ctxt->input->buf->buffer);
12372         /*
12373          * Specific handling if we autodetected an encoding, we should not
12374          * push more than the first line ... which depend on the encoding
12375          * And only push the rest once the final encoding was detected
12376          */
12377         if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12378             (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12379             unsigned int len = 45;
12380 
12381             if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12382                                BAD_CAST "UTF-16")) ||
12383                 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12384                                BAD_CAST "UTF16")))
12385                 len = 90;
12386             else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12387                                     BAD_CAST "UCS-4")) ||
12388                      (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12389                                     BAD_CAST "UCS4")))
12390                 len = 180;
12391 
12392             if (ctxt->input->buf->rawconsumed < len)
12393                 len -= ctxt->input->buf->rawconsumed;
12394 
12395             /*
12396              * Change size for reading the initial declaration only
12397              * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12398              * will blindly copy extra bytes from memory.
12399              */
12400             if ((unsigned int) size > len) {
12401                 remain = size - len;
12402                 size = len;
12403             } else {
12404                 remain = 0;
12405             }
12406         }
12407 	res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12408 	if (res < 0) {
12409 	    ctxt->errNo = XML_PARSER_EOF;
12410 	    xmlHaltParser(ctxt);
12411 	    return (XML_PARSER_EOF);
12412 	}
12413         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12414 #ifdef DEBUG_PUSH
12415 	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12416 #endif
12417 
12418     } else if (ctxt->instate != XML_PARSER_EOF) {
12419 	if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12420 	    xmlParserInputBufferPtr in = ctxt->input->buf;
12421 	    if ((in->encoder != NULL) && (in->buffer != NULL) &&
12422 		    (in->raw != NULL)) {
12423 		int nbchars;
12424 		size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12425 		size_t current = ctxt->input->cur - ctxt->input->base;
12426 
12427 		nbchars = xmlCharEncInput(in, terminate);
12428 		if (nbchars < 0) {
12429 		    /* TODO 2.6.0 */
12430 		    xmlGenericError(xmlGenericErrorContext,
12431 				    "xmlParseChunk: encoder error\n");
12432 		    return(XML_ERR_INVALID_ENCODING);
12433 		}
12434 		xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12435 	    }
12436 	}
12437     }
12438     if (remain != 0) {
12439         xmlParseTryOrFinish(ctxt, 0);
12440     } else {
12441         if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12442             avail = xmlBufUse(ctxt->input->buf->buffer);
12443         /*
12444          * Depending on the current state it may not be such
12445          * a good idea to try parsing if there is nothing in the chunk
12446          * which would be worth doing a parser state transition and we
12447          * need to wait for more data
12448          */
12449         if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12450             (old_avail == 0) || (avail == 0) ||
12451             (xmlParseCheckTransition(ctxt,
12452                        (const char *)&ctxt->input->base[old_avail],
12453                                      avail - old_avail)))
12454             xmlParseTryOrFinish(ctxt, terminate);
12455     }
12456     if (ctxt->instate == XML_PARSER_EOF)
12457         return(ctxt->errNo);
12458 
12459     if ((ctxt->input != NULL) &&
12460          (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12461          ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12462         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12463         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12464         xmlHaltParser(ctxt);
12465     }
12466     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12467         return(ctxt->errNo);
12468 
12469     if (remain != 0) {
12470         chunk += size;
12471         size = remain;
12472         remain = 0;
12473         goto xmldecl_done;
12474     }
12475     if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12476         (ctxt->input->buf != NULL)) {
12477 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12478 					 ctxt->input);
12479 	size_t current = ctxt->input->cur - ctxt->input->base;
12480 
12481 	xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12482 
12483 	xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12484 			      base, current);
12485     }
12486     if (terminate) {
12487 	/*
12488 	 * Check for termination
12489 	 */
12490 	int cur_avail = 0;
12491 
12492 	if (ctxt->input != NULL) {
12493 	    if (ctxt->input->buf == NULL)
12494 		cur_avail = ctxt->input->length -
12495 			    (ctxt->input->cur - ctxt->input->base);
12496 	    else
12497 		cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12498 			              (ctxt->input->cur - ctxt->input->base);
12499 	}
12500 
12501 	if ((ctxt->instate != XML_PARSER_EOF) &&
12502 	    (ctxt->instate != XML_PARSER_EPILOG)) {
12503 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12504 	}
12505 	if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12506 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12507 	}
12508 	if (ctxt->instate != XML_PARSER_EOF) {
12509 	    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12510 		ctxt->sax->endDocument(ctxt->userData);
12511 	}
12512 	ctxt->instate = XML_PARSER_EOF;
12513     }
12514     if (ctxt->wellFormed == 0)
12515 	return((xmlParserErrors) ctxt->errNo);
12516     else
12517         return(0);
12518 }
12519 
12520 /************************************************************************
12521  *									*
12522  *		I/O front end functions to the parser			*
12523  *									*
12524  ************************************************************************/
12525 
12526 /**
12527  * xmlCreatePushParserCtxt:
12528  * @sax:  a SAX handler
12529  * @user_data:  The user data returned on SAX callbacks
12530  * @chunk:  a pointer to an array of chars
12531  * @size:  number of chars in the array
12532  * @filename:  an optional file name or URI
12533  *
12534  * Create a parser context for using the XML parser in push mode.
12535  * If @buffer and @size are non-NULL, the data is used to detect
12536  * the encoding.  The remaining characters will be parsed so they
12537  * don't need to be fed in again through xmlParseChunk.
12538  * To allow content encoding detection, @size should be >= 4
12539  * The value of @filename is used for fetching external entities
12540  * and error/warning reports.
12541  *
12542  * Returns the new parser context or NULL
12543  */
12544 
12545 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)12546 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12547                         const char *chunk, int size, const char *filename) {
12548     xmlParserCtxtPtr ctxt;
12549     xmlParserInputPtr inputStream;
12550     xmlParserInputBufferPtr buf;
12551     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12552 
12553     /*
12554      * plug some encoding conversion routines
12555      */
12556     if ((chunk != NULL) && (size >= 4))
12557 	enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12558 
12559     buf = xmlAllocParserInputBuffer(enc);
12560     if (buf == NULL) return(NULL);
12561 
12562     ctxt = xmlNewParserCtxt();
12563     if (ctxt == NULL) {
12564         xmlErrMemory(NULL, "creating parser: out of memory\n");
12565 	xmlFreeParserInputBuffer(buf);
12566 	return(NULL);
12567     }
12568     ctxt->dictNames = 1;
12569     ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12570     if (ctxt->pushTab == NULL) {
12571         xmlErrMemory(ctxt, NULL);
12572 	xmlFreeParserInputBuffer(buf);
12573 	xmlFreeParserCtxt(ctxt);
12574 	return(NULL);
12575     }
12576     if (sax != NULL) {
12577 #ifdef LIBXML_SAX1_ENABLED
12578 	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12579 #endif /* LIBXML_SAX1_ENABLED */
12580 	    xmlFree(ctxt->sax);
12581 	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12582 	if (ctxt->sax == NULL) {
12583 	    xmlErrMemory(ctxt, NULL);
12584 	    xmlFreeParserInputBuffer(buf);
12585 	    xmlFreeParserCtxt(ctxt);
12586 	    return(NULL);
12587 	}
12588 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12589 	if (sax->initialized == XML_SAX2_MAGIC)
12590 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12591 	else
12592 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12593 	if (user_data != NULL)
12594 	    ctxt->userData = user_data;
12595     }
12596     if (filename == NULL) {
12597 	ctxt->directory = NULL;
12598     } else {
12599         ctxt->directory = xmlParserGetDirectory(filename);
12600     }
12601 
12602     inputStream = xmlNewInputStream(ctxt);
12603     if (inputStream == NULL) {
12604 	xmlFreeParserCtxt(ctxt);
12605 	xmlFreeParserInputBuffer(buf);
12606 	return(NULL);
12607     }
12608 
12609     if (filename == NULL)
12610 	inputStream->filename = NULL;
12611     else {
12612 	inputStream->filename = (char *)
12613 	    xmlCanonicPath((const xmlChar *) filename);
12614 	if (inputStream->filename == NULL) {
12615 	    xmlFreeParserCtxt(ctxt);
12616 	    xmlFreeParserInputBuffer(buf);
12617 	    return(NULL);
12618 	}
12619     }
12620     inputStream->buf = buf;
12621     xmlBufResetInput(inputStream->buf->buffer, inputStream);
12622     inputPush(ctxt, inputStream);
12623 
12624     /*
12625      * If the caller didn't provide an initial 'chunk' for determining
12626      * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12627      * that it can be automatically determined later
12628      */
12629     if ((size == 0) || (chunk == NULL)) {
12630 	ctxt->charset = XML_CHAR_ENCODING_NONE;
12631     } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12632 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12633 	size_t cur = ctxt->input->cur - ctxt->input->base;
12634 
12635 	xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12636 
12637         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12638 #ifdef DEBUG_PUSH
12639 	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12640 #endif
12641     }
12642 
12643     if (enc != XML_CHAR_ENCODING_NONE) {
12644         xmlSwitchEncoding(ctxt, enc);
12645     }
12646 
12647     return(ctxt);
12648 }
12649 #endif /* LIBXML_PUSH_ENABLED */
12650 
12651 /**
12652  * xmlHaltParser:
12653  * @ctxt:  an XML parser context
12654  *
12655  * Blocks further parser processing don't override error
12656  * for internal use
12657  */
12658 static void
xmlHaltParser(xmlParserCtxtPtr ctxt)12659 xmlHaltParser(xmlParserCtxtPtr ctxt) {
12660     if (ctxt == NULL)
12661         return;
12662     ctxt->instate = XML_PARSER_EOF;
12663     ctxt->disableSAX = 1;
12664     if (ctxt->input != NULL) {
12665         /*
12666 	 * in case there was a specific allocation deallocate before
12667 	 * overriding base
12668 	 */
12669         if (ctxt->input->free != NULL) {
12670 	    ctxt->input->free((xmlChar *) ctxt->input->base);
12671 	    ctxt->input->free = NULL;
12672 	}
12673 	ctxt->input->cur = BAD_CAST"";
12674 	ctxt->input->base = ctxt->input->cur;
12675     }
12676 }
12677 
12678 /**
12679  * xmlStopParser:
12680  * @ctxt:  an XML parser context
12681  *
12682  * Blocks further parser processing
12683  */
12684 void
xmlStopParser(xmlParserCtxtPtr ctxt)12685 xmlStopParser(xmlParserCtxtPtr ctxt) {
12686     if (ctxt == NULL)
12687         return;
12688     xmlHaltParser(ctxt);
12689     ctxt->errNo = XML_ERR_USER_STOP;
12690 }
12691 
12692 /**
12693  * xmlCreateIOParserCtxt:
12694  * @sax:  a SAX handler
12695  * @user_data:  The user data returned on SAX callbacks
12696  * @ioread:  an I/O read function
12697  * @ioclose:  an I/O close function
12698  * @ioctx:  an I/O handler
12699  * @enc:  the charset encoding if known
12700  *
12701  * Create a parser context for using the XML parser with an existing
12702  * I/O stream
12703  *
12704  * Returns the new parser context or NULL
12705  */
12706 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)12707 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12708 	xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12709 	void *ioctx, xmlCharEncoding enc) {
12710     xmlParserCtxtPtr ctxt;
12711     xmlParserInputPtr inputStream;
12712     xmlParserInputBufferPtr buf;
12713 
12714     if (ioread == NULL) return(NULL);
12715 
12716     buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12717     if (buf == NULL) {
12718         if (ioclose != NULL)
12719             ioclose(ioctx);
12720         return (NULL);
12721     }
12722 
12723     ctxt = xmlNewParserCtxt();
12724     if (ctxt == NULL) {
12725 	xmlFreeParserInputBuffer(buf);
12726 	return(NULL);
12727     }
12728     if (sax != NULL) {
12729 #ifdef LIBXML_SAX1_ENABLED
12730 	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12731 #endif /* LIBXML_SAX1_ENABLED */
12732 	    xmlFree(ctxt->sax);
12733 	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12734 	if (ctxt->sax == NULL) {
12735 	    xmlErrMemory(ctxt, NULL);
12736 	    xmlFreeParserCtxt(ctxt);
12737 	    return(NULL);
12738 	}
12739 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12740 	if (sax->initialized == XML_SAX2_MAGIC)
12741 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12742 	else
12743 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12744 	if (user_data != NULL)
12745 	    ctxt->userData = user_data;
12746     }
12747 
12748     inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12749     if (inputStream == NULL) {
12750 	xmlFreeParserCtxt(ctxt);
12751 	return(NULL);
12752     }
12753     inputPush(ctxt, inputStream);
12754 
12755     return(ctxt);
12756 }
12757 
12758 #ifdef LIBXML_VALID_ENABLED
12759 /************************************************************************
12760  *									*
12761  *		Front ends when parsing a DTD				*
12762  *									*
12763  ************************************************************************/
12764 
12765 /**
12766  * xmlIOParseDTD:
12767  * @sax:  the SAX handler block or NULL
12768  * @input:  an Input Buffer
12769  * @enc:  the charset encoding if known
12770  *
12771  * Load and parse a DTD
12772  *
12773  * Returns the resulting xmlDtdPtr or NULL in case of error.
12774  * @input will be freed by the function in any case.
12775  */
12776 
12777 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)12778 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12779 	      xmlCharEncoding enc) {
12780     xmlDtdPtr ret = NULL;
12781     xmlParserCtxtPtr ctxt;
12782     xmlParserInputPtr pinput = NULL;
12783     xmlChar start[4];
12784 
12785     if (input == NULL)
12786 	return(NULL);
12787 
12788     ctxt = xmlNewParserCtxt();
12789     if (ctxt == NULL) {
12790         xmlFreeParserInputBuffer(input);
12791 	return(NULL);
12792     }
12793 
12794     /* We are loading a DTD */
12795     ctxt->options |= XML_PARSE_DTDLOAD;
12796 
12797     /*
12798      * Set-up the SAX context
12799      */
12800     if (sax != NULL) {
12801 	if (ctxt->sax != NULL)
12802 	    xmlFree(ctxt->sax);
12803         ctxt->sax = sax;
12804         ctxt->userData = ctxt;
12805     }
12806     xmlDetectSAX2(ctxt);
12807 
12808     /*
12809      * generate a parser input from the I/O handler
12810      */
12811 
12812     pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12813     if (pinput == NULL) {
12814         if (sax != NULL) ctxt->sax = NULL;
12815         xmlFreeParserInputBuffer(input);
12816 	xmlFreeParserCtxt(ctxt);
12817 	return(NULL);
12818     }
12819 
12820     /*
12821      * plug some encoding conversion routines here.
12822      */
12823     if (xmlPushInput(ctxt, pinput) < 0) {
12824         if (sax != NULL) ctxt->sax = NULL;
12825 	xmlFreeParserCtxt(ctxt);
12826 	return(NULL);
12827     }
12828     if (enc != XML_CHAR_ENCODING_NONE) {
12829         xmlSwitchEncoding(ctxt, enc);
12830     }
12831 
12832     pinput->filename = NULL;
12833     pinput->line = 1;
12834     pinput->col = 1;
12835     pinput->base = ctxt->input->cur;
12836     pinput->cur = ctxt->input->cur;
12837     pinput->free = NULL;
12838 
12839     /*
12840      * let's parse that entity knowing it's an external subset.
12841      */
12842     ctxt->inSubset = 2;
12843     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12844     if (ctxt->myDoc == NULL) {
12845 	xmlErrMemory(ctxt, "New Doc failed");
12846 	return(NULL);
12847     }
12848     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12849     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12850 	                               BAD_CAST "none", BAD_CAST "none");
12851 
12852     if ((enc == XML_CHAR_ENCODING_NONE) &&
12853         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12854 	/*
12855 	 * Get the 4 first bytes and decode the charset
12856 	 * if enc != XML_CHAR_ENCODING_NONE
12857 	 * plug some encoding conversion routines.
12858 	 */
12859 	start[0] = RAW;
12860 	start[1] = NXT(1);
12861 	start[2] = NXT(2);
12862 	start[3] = NXT(3);
12863 	enc = xmlDetectCharEncoding(start, 4);
12864 	if (enc != XML_CHAR_ENCODING_NONE) {
12865 	    xmlSwitchEncoding(ctxt, enc);
12866 	}
12867     }
12868 
12869     xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12870 
12871     if (ctxt->myDoc != NULL) {
12872 	if (ctxt->wellFormed) {
12873 	    ret = ctxt->myDoc->extSubset;
12874 	    ctxt->myDoc->extSubset = NULL;
12875 	    if (ret != NULL) {
12876 		xmlNodePtr tmp;
12877 
12878 		ret->doc = NULL;
12879 		tmp = ret->children;
12880 		while (tmp != NULL) {
12881 		    tmp->doc = NULL;
12882 		    tmp = tmp->next;
12883 		}
12884 	    }
12885 	} else {
12886 	    ret = NULL;
12887 	}
12888         xmlFreeDoc(ctxt->myDoc);
12889         ctxt->myDoc = NULL;
12890     }
12891     if (sax != NULL) ctxt->sax = NULL;
12892     xmlFreeParserCtxt(ctxt);
12893 
12894     return(ret);
12895 }
12896 
12897 /**
12898  * xmlSAXParseDTD:
12899  * @sax:  the SAX handler block
12900  * @ExternalID:  a NAME* containing the External ID of the DTD
12901  * @SystemID:  a NAME* containing the URL to the DTD
12902  *
12903  * Load and parse an external subset.
12904  *
12905  * Returns the resulting xmlDtdPtr or NULL in case of error.
12906  */
12907 
12908 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)12909 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12910                           const xmlChar *SystemID) {
12911     xmlDtdPtr ret = NULL;
12912     xmlParserCtxtPtr ctxt;
12913     xmlParserInputPtr input = NULL;
12914     xmlCharEncoding enc;
12915     xmlChar* systemIdCanonic;
12916 
12917     if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12918 
12919     ctxt = xmlNewParserCtxt();
12920     if (ctxt == NULL) {
12921 	return(NULL);
12922     }
12923 
12924     /* We are loading a DTD */
12925     ctxt->options |= XML_PARSE_DTDLOAD;
12926 
12927     /*
12928      * Set-up the SAX context
12929      */
12930     if (sax != NULL) {
12931 	if (ctxt->sax != NULL)
12932 	    xmlFree(ctxt->sax);
12933         ctxt->sax = sax;
12934         ctxt->userData = ctxt;
12935     }
12936 
12937     /*
12938      * Canonicalise the system ID
12939      */
12940     systemIdCanonic = xmlCanonicPath(SystemID);
12941     if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12942 	xmlFreeParserCtxt(ctxt);
12943 	return(NULL);
12944     }
12945 
12946     /*
12947      * Ask the Entity resolver to load the damn thing
12948      */
12949 
12950     if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12951 	input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12952 	                                 systemIdCanonic);
12953     if (input == NULL) {
12954         if (sax != NULL) ctxt->sax = NULL;
12955 	xmlFreeParserCtxt(ctxt);
12956 	if (systemIdCanonic != NULL)
12957 	    xmlFree(systemIdCanonic);
12958 	return(NULL);
12959     }
12960 
12961     /*
12962      * plug some encoding conversion routines here.
12963      */
12964     if (xmlPushInput(ctxt, input) < 0) {
12965         if (sax != NULL) ctxt->sax = NULL;
12966 	xmlFreeParserCtxt(ctxt);
12967 	if (systemIdCanonic != NULL)
12968 	    xmlFree(systemIdCanonic);
12969 	return(NULL);
12970     }
12971     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12972 	enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12973 	xmlSwitchEncoding(ctxt, enc);
12974     }
12975 
12976     if (input->filename == NULL)
12977 	input->filename = (char *) systemIdCanonic;
12978     else
12979 	xmlFree(systemIdCanonic);
12980     input->line = 1;
12981     input->col = 1;
12982     input->base = ctxt->input->cur;
12983     input->cur = ctxt->input->cur;
12984     input->free = NULL;
12985 
12986     /*
12987      * let's parse that entity knowing it's an external subset.
12988      */
12989     ctxt->inSubset = 2;
12990     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12991     if (ctxt->myDoc == NULL) {
12992 	xmlErrMemory(ctxt, "New Doc failed");
12993         if (sax != NULL) ctxt->sax = NULL;
12994 	xmlFreeParserCtxt(ctxt);
12995 	return(NULL);
12996     }
12997     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12998     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12999 	                               ExternalID, SystemID);
13000     xmlParseExternalSubset(ctxt, ExternalID, SystemID);
13001 
13002     if (ctxt->myDoc != NULL) {
13003 	if (ctxt->wellFormed) {
13004 	    ret = ctxt->myDoc->extSubset;
13005 	    ctxt->myDoc->extSubset = NULL;
13006 	    if (ret != NULL) {
13007 		xmlNodePtr tmp;
13008 
13009 		ret->doc = NULL;
13010 		tmp = ret->children;
13011 		while (tmp != NULL) {
13012 		    tmp->doc = NULL;
13013 		    tmp = tmp->next;
13014 		}
13015 	    }
13016 	} else {
13017 	    ret = NULL;
13018 	}
13019         xmlFreeDoc(ctxt->myDoc);
13020         ctxt->myDoc = NULL;
13021     }
13022     if (sax != NULL) ctxt->sax = NULL;
13023     xmlFreeParserCtxt(ctxt);
13024 
13025     return(ret);
13026 }
13027 
13028 
13029 /**
13030  * xmlParseDTD:
13031  * @ExternalID:  a NAME* containing the External ID of the DTD
13032  * @SystemID:  a NAME* containing the URL to the DTD
13033  *
13034  * Load and parse an external subset.
13035  *
13036  * Returns the resulting xmlDtdPtr or NULL in case of error.
13037  */
13038 
13039 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)13040 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
13041     return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
13042 }
13043 #endif /* LIBXML_VALID_ENABLED */
13044 
13045 /************************************************************************
13046  *									*
13047  *		Front ends when parsing an Entity			*
13048  *									*
13049  ************************************************************************/
13050 
13051 /**
13052  * xmlParseCtxtExternalEntity:
13053  * @ctx:  the existing parsing context
13054  * @URL:  the URL for the entity to load
13055  * @ID:  the System ID for the entity to load
13056  * @lst:  the return value for the set of parsed nodes
13057  *
13058  * Parse an external general entity within an existing parsing context
13059  * An external general parsed entity is well-formed if it matches the
13060  * production labeled extParsedEnt.
13061  *
13062  * [78] extParsedEnt ::= TextDecl? content
13063  *
13064  * Returns 0 if the entity is well formed, -1 in case of args problem and
13065  *    the parser error code otherwise
13066  */
13067 
13068 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)13069 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
13070 	               const xmlChar *ID, xmlNodePtr *lst) {
13071     xmlParserCtxtPtr ctxt;
13072     xmlDocPtr newDoc;
13073     xmlNodePtr newRoot;
13074     xmlSAXHandlerPtr oldsax = NULL;
13075     int ret = 0;
13076     xmlChar start[4];
13077     xmlCharEncoding enc;
13078 
13079     if (ctx == NULL) return(-1);
13080 
13081     if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
13082         (ctx->depth > 1024)) {
13083 	return(XML_ERR_ENTITY_LOOP);
13084     }
13085 
13086     if (lst != NULL)
13087         *lst = NULL;
13088     if ((URL == NULL) && (ID == NULL))
13089 	return(-1);
13090     if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
13091 	return(-1);
13092 
13093     ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
13094     if (ctxt == NULL) {
13095 	return(-1);
13096     }
13097 
13098     oldsax = ctxt->sax;
13099     ctxt->sax = ctx->sax;
13100     xmlDetectSAX2(ctxt);
13101     newDoc = xmlNewDoc(BAD_CAST "1.0");
13102     if (newDoc == NULL) {
13103 	xmlFreeParserCtxt(ctxt);
13104 	return(-1);
13105     }
13106     newDoc->properties = XML_DOC_INTERNAL;
13107     if (ctx->myDoc->dict) {
13108 	newDoc->dict = ctx->myDoc->dict;
13109 	xmlDictReference(newDoc->dict);
13110     }
13111     if (ctx->myDoc != NULL) {
13112 	newDoc->intSubset = ctx->myDoc->intSubset;
13113 	newDoc->extSubset = ctx->myDoc->extSubset;
13114     }
13115     if (ctx->myDoc->URL != NULL) {
13116 	newDoc->URL = xmlStrdup(ctx->myDoc->URL);
13117     }
13118     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13119     if (newRoot == NULL) {
13120 	ctxt->sax = oldsax;
13121 	xmlFreeParserCtxt(ctxt);
13122 	newDoc->intSubset = NULL;
13123 	newDoc->extSubset = NULL;
13124         xmlFreeDoc(newDoc);
13125 	return(-1);
13126     }
13127     xmlAddChild((xmlNodePtr) newDoc, newRoot);
13128     nodePush(ctxt, newDoc->children);
13129     if (ctx->myDoc == NULL) {
13130 	ctxt->myDoc = newDoc;
13131     } else {
13132 	ctxt->myDoc = ctx->myDoc;
13133 	newDoc->children->doc = ctx->myDoc;
13134     }
13135 
13136     /*
13137      * Get the 4 first bytes and decode the charset
13138      * if enc != XML_CHAR_ENCODING_NONE
13139      * plug some encoding conversion routines.
13140      */
13141     GROW
13142     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13143 	start[0] = RAW;
13144 	start[1] = NXT(1);
13145 	start[2] = NXT(2);
13146 	start[3] = NXT(3);
13147 	enc = xmlDetectCharEncoding(start, 4);
13148 	if (enc != XML_CHAR_ENCODING_NONE) {
13149 	    xmlSwitchEncoding(ctxt, enc);
13150 	}
13151     }
13152 
13153     /*
13154      * Parse a possible text declaration first
13155      */
13156     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13157 	xmlParseTextDecl(ctxt);
13158 	/*
13159 	 * An XML-1.0 document can't reference an entity not XML-1.0
13160 	 */
13161 	if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
13162 	    (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13163 	    xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13164 	                   "Version mismatch between document and entity\n");
13165 	}
13166     }
13167 
13168     /*
13169      * If the user provided its own SAX callbacks then reuse the
13170      * useData callback field, otherwise the expected setup in a
13171      * DOM builder is to have userData == ctxt
13172      */
13173     if (ctx->userData == ctx)
13174         ctxt->userData = ctxt;
13175     else
13176         ctxt->userData = ctx->userData;
13177 
13178     /*
13179      * Doing validity checking on chunk doesn't make sense
13180      */
13181     ctxt->instate = XML_PARSER_CONTENT;
13182     ctxt->validate = ctx->validate;
13183     ctxt->valid = ctx->valid;
13184     ctxt->loadsubset = ctx->loadsubset;
13185     ctxt->depth = ctx->depth + 1;
13186     ctxt->replaceEntities = ctx->replaceEntities;
13187     if (ctxt->validate) {
13188 	ctxt->vctxt.error = ctx->vctxt.error;
13189 	ctxt->vctxt.warning = ctx->vctxt.warning;
13190     } else {
13191 	ctxt->vctxt.error = NULL;
13192 	ctxt->vctxt.warning = NULL;
13193     }
13194     ctxt->vctxt.nodeTab = NULL;
13195     ctxt->vctxt.nodeNr = 0;
13196     ctxt->vctxt.nodeMax = 0;
13197     ctxt->vctxt.node = NULL;
13198     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13199     ctxt->dict = ctx->dict;
13200     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13201     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13202     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13203     ctxt->dictNames = ctx->dictNames;
13204     ctxt->attsDefault = ctx->attsDefault;
13205     ctxt->attsSpecial = ctx->attsSpecial;
13206     ctxt->linenumbers = ctx->linenumbers;
13207 
13208     xmlParseContent(ctxt);
13209 
13210     ctx->validate = ctxt->validate;
13211     ctx->valid = ctxt->valid;
13212     if ((RAW == '<') && (NXT(1) == '/')) {
13213 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13214     } else if (RAW != 0) {
13215 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13216     }
13217     if (ctxt->node != newDoc->children) {
13218 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13219     }
13220 
13221     if (!ctxt->wellFormed) {
13222         if (ctxt->errNo == 0)
13223 	    ret = 1;
13224 	else
13225 	    ret = ctxt->errNo;
13226     } else {
13227 	if (lst != NULL) {
13228 	    xmlNodePtr cur;
13229 
13230 	    /*
13231 	     * Return the newly created nodeset after unlinking it from
13232 	     * they pseudo parent.
13233 	     */
13234 	    cur = newDoc->children->children;
13235 	    *lst = cur;
13236 	    while (cur != NULL) {
13237 		cur->parent = NULL;
13238 		cur = cur->next;
13239 	    }
13240             newDoc->children->children = NULL;
13241 	}
13242 	ret = 0;
13243     }
13244     ctxt->sax = oldsax;
13245     ctxt->dict = NULL;
13246     ctxt->attsDefault = NULL;
13247     ctxt->attsSpecial = NULL;
13248     xmlFreeParserCtxt(ctxt);
13249     newDoc->intSubset = NULL;
13250     newDoc->extSubset = NULL;
13251     xmlFreeDoc(newDoc);
13252 
13253     return(ret);
13254 }
13255 
13256 /**
13257  * xmlParseExternalEntityPrivate:
13258  * @doc:  the document the chunk pertains to
13259  * @oldctxt:  the previous parser context if available
13260  * @sax:  the SAX handler bloc (possibly NULL)
13261  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13262  * @depth:  Used for loop detection, use 0
13263  * @URL:  the URL for the entity to load
13264  * @ID:  the System ID for the entity to load
13265  * @list:  the return value for the set of parsed nodes
13266  *
13267  * Private version of xmlParseExternalEntity()
13268  *
13269  * Returns 0 if the entity is well formed, -1 in case of args problem and
13270  *    the parser error code otherwise
13271  */
13272 
13273 static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc,xmlParserCtxtPtr oldctxt,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)13274 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13275 	              xmlSAXHandlerPtr sax,
13276 		      void *user_data, int depth, const xmlChar *URL,
13277 		      const xmlChar *ID, xmlNodePtr *list) {
13278     xmlParserCtxtPtr ctxt;
13279     xmlDocPtr newDoc;
13280     xmlNodePtr newRoot;
13281     xmlSAXHandlerPtr oldsax = NULL;
13282     xmlParserErrors ret = XML_ERR_OK;
13283     xmlChar start[4];
13284     xmlCharEncoding enc;
13285 
13286     if (((depth > 40) &&
13287 	((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13288 	(depth > 1024)) {
13289 	return(XML_ERR_ENTITY_LOOP);
13290     }
13291 
13292     if (list != NULL)
13293         *list = NULL;
13294     if ((URL == NULL) && (ID == NULL))
13295 	return(XML_ERR_INTERNAL_ERROR);
13296     if (doc == NULL)
13297 	return(XML_ERR_INTERNAL_ERROR);
13298 
13299 
13300     ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13301     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13302     ctxt->userData = ctxt;
13303     if (oldctxt != NULL) {
13304 	ctxt->_private = oldctxt->_private;
13305 	ctxt->loadsubset = oldctxt->loadsubset;
13306 	ctxt->validate = oldctxt->validate;
13307 	ctxt->external = oldctxt->external;
13308 	ctxt->record_info = oldctxt->record_info;
13309 	ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13310 	ctxt->node_seq.length = oldctxt->node_seq.length;
13311 	ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13312     } else {
13313 	/*
13314 	 * Doing validity checking on chunk without context
13315 	 * doesn't make sense
13316 	 */
13317 	ctxt->_private = NULL;
13318 	ctxt->validate = 0;
13319 	ctxt->external = 2;
13320 	ctxt->loadsubset = 0;
13321     }
13322     if (sax != NULL) {
13323 	oldsax = ctxt->sax;
13324         ctxt->sax = sax;
13325 	if (user_data != NULL)
13326 	    ctxt->userData = user_data;
13327     }
13328     xmlDetectSAX2(ctxt);
13329     newDoc = xmlNewDoc(BAD_CAST "1.0");
13330     if (newDoc == NULL) {
13331 	ctxt->node_seq.maximum = 0;
13332 	ctxt->node_seq.length = 0;
13333 	ctxt->node_seq.buffer = NULL;
13334 	xmlFreeParserCtxt(ctxt);
13335 	return(XML_ERR_INTERNAL_ERROR);
13336     }
13337     newDoc->properties = XML_DOC_INTERNAL;
13338     newDoc->intSubset = doc->intSubset;
13339     newDoc->extSubset = doc->extSubset;
13340     newDoc->dict = doc->dict;
13341     xmlDictReference(newDoc->dict);
13342 
13343     if (doc->URL != NULL) {
13344 	newDoc->URL = xmlStrdup(doc->URL);
13345     }
13346     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13347     if (newRoot == NULL) {
13348 	if (sax != NULL)
13349 	    ctxt->sax = oldsax;
13350 	ctxt->node_seq.maximum = 0;
13351 	ctxt->node_seq.length = 0;
13352 	ctxt->node_seq.buffer = NULL;
13353 	xmlFreeParserCtxt(ctxt);
13354 	newDoc->intSubset = NULL;
13355 	newDoc->extSubset = NULL;
13356         xmlFreeDoc(newDoc);
13357 	return(XML_ERR_INTERNAL_ERROR);
13358     }
13359     xmlAddChild((xmlNodePtr) newDoc, newRoot);
13360     nodePush(ctxt, newDoc->children);
13361     ctxt->myDoc = doc;
13362     newRoot->doc = doc;
13363 
13364     /*
13365      * Get the 4 first bytes and decode the charset
13366      * if enc != XML_CHAR_ENCODING_NONE
13367      * plug some encoding conversion routines.
13368      */
13369     GROW;
13370     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13371 	start[0] = RAW;
13372 	start[1] = NXT(1);
13373 	start[2] = NXT(2);
13374 	start[3] = NXT(3);
13375 	enc = xmlDetectCharEncoding(start, 4);
13376 	if (enc != XML_CHAR_ENCODING_NONE) {
13377 	    xmlSwitchEncoding(ctxt, enc);
13378 	}
13379     }
13380 
13381     /*
13382      * Parse a possible text declaration first
13383      */
13384     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13385 	xmlParseTextDecl(ctxt);
13386     }
13387 
13388     ctxt->instate = XML_PARSER_CONTENT;
13389     ctxt->depth = depth;
13390 
13391     xmlParseContent(ctxt);
13392 
13393     if ((RAW == '<') && (NXT(1) == '/')) {
13394 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13395     } else if (RAW != 0) {
13396 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13397     }
13398     if (ctxt->node != newDoc->children) {
13399 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13400     }
13401 
13402     if (!ctxt->wellFormed) {
13403         if (ctxt->errNo == 0)
13404 	    ret = XML_ERR_INTERNAL_ERROR;
13405 	else
13406 	    ret = (xmlParserErrors)ctxt->errNo;
13407     } else {
13408 	if (list != NULL) {
13409 	    xmlNodePtr cur;
13410 
13411 	    /*
13412 	     * Return the newly created nodeset after unlinking it from
13413 	     * they pseudo parent.
13414 	     */
13415 	    cur = newDoc->children->children;
13416 	    *list = cur;
13417 	    while (cur != NULL) {
13418 		cur->parent = NULL;
13419 		cur = cur->next;
13420 	    }
13421             newDoc->children->children = NULL;
13422 	}
13423 	ret = XML_ERR_OK;
13424     }
13425 
13426     /*
13427      * Record in the parent context the number of entities replacement
13428      * done when parsing that reference.
13429      */
13430     if (oldctxt != NULL)
13431         oldctxt->nbentities += ctxt->nbentities;
13432 
13433     /*
13434      * Also record the size of the entity parsed
13435      */
13436     if (ctxt->input != NULL && oldctxt != NULL) {
13437 	oldctxt->sizeentities += ctxt->input->consumed;
13438 	oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13439     }
13440     /*
13441      * And record the last error if any
13442      */
13443     if (ctxt->lastError.code != XML_ERR_OK)
13444         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13445 
13446     if (sax != NULL)
13447 	ctxt->sax = oldsax;
13448     if (oldctxt != NULL) {
13449         oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13450         oldctxt->node_seq.length = ctxt->node_seq.length;
13451         oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13452     }
13453     ctxt->node_seq.maximum = 0;
13454     ctxt->node_seq.length = 0;
13455     ctxt->node_seq.buffer = NULL;
13456     xmlFreeParserCtxt(ctxt);
13457     newDoc->intSubset = NULL;
13458     newDoc->extSubset = NULL;
13459     xmlFreeDoc(newDoc);
13460 
13461     return(ret);
13462 }
13463 
13464 #ifdef LIBXML_SAX1_ENABLED
13465 /**
13466  * xmlParseExternalEntity:
13467  * @doc:  the document the chunk pertains to
13468  * @sax:  the SAX handler bloc (possibly NULL)
13469  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13470  * @depth:  Used for loop detection, use 0
13471  * @URL:  the URL for the entity to load
13472  * @ID:  the System ID for the entity to load
13473  * @lst:  the return value for the set of parsed nodes
13474  *
13475  * Parse an external general entity
13476  * An external general parsed entity is well-formed if it matches the
13477  * production labeled extParsedEnt.
13478  *
13479  * [78] extParsedEnt ::= TextDecl? content
13480  *
13481  * Returns 0 if the entity is well formed, -1 in case of args problem and
13482  *    the parser error code otherwise
13483  */
13484 
13485 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)13486 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13487 	  int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13488     return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13489 		                       ID, lst));
13490 }
13491 
13492 /**
13493  * xmlParseBalancedChunkMemory:
13494  * @doc:  the document the chunk pertains to
13495  * @sax:  the SAX handler bloc (possibly NULL)
13496  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13497  * @depth:  Used for loop detection, use 0
13498  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13499  * @lst:  the return value for the set of parsed nodes
13500  *
13501  * Parse a well-balanced chunk of an XML document
13502  * called by the parser
13503  * The allowed sequence for the Well Balanced Chunk is the one defined by
13504  * the content production in the XML grammar:
13505  *
13506  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13507  *
13508  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13509  *    the parser error code otherwise
13510  */
13511 
13512 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)13513 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13514      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13515     return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13516                                                 depth, string, lst, 0 );
13517 }
13518 #endif /* LIBXML_SAX1_ENABLED */
13519 
13520 /**
13521  * xmlParseBalancedChunkMemoryInternal:
13522  * @oldctxt:  the existing parsing context
13523  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13524  * @user_data:  the user data field for the parser context
13525  * @lst:  the return value for the set of parsed nodes
13526  *
13527  *
13528  * Parse a well-balanced chunk of an XML document
13529  * called by the parser
13530  * The allowed sequence for the Well Balanced Chunk is the one defined by
13531  * the content production in the XML grammar:
13532  *
13533  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13534  *
13535  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13536  * error code otherwise
13537  *
13538  * In case recover is set to 1, the nodelist will not be empty even if
13539  * the parsed chunk is not well balanced.
13540  */
13541 static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,const xmlChar * string,void * user_data,xmlNodePtr * lst)13542 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13543 	const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13544     xmlParserCtxtPtr ctxt;
13545     xmlDocPtr newDoc = NULL;
13546     xmlNodePtr newRoot;
13547     xmlSAXHandlerPtr oldsax = NULL;
13548     xmlNodePtr content = NULL;
13549     xmlNodePtr last = NULL;
13550     int size;
13551     xmlParserErrors ret = XML_ERR_OK;
13552 #ifdef SAX2
13553     int i;
13554 #endif
13555 
13556     if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13557         (oldctxt->depth >  1024)) {
13558 	return(XML_ERR_ENTITY_LOOP);
13559     }
13560 
13561 
13562     if (lst != NULL)
13563         *lst = NULL;
13564     if (string == NULL)
13565         return(XML_ERR_INTERNAL_ERROR);
13566 
13567     size = xmlStrlen(string);
13568 
13569     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13570     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13571     if (user_data != NULL)
13572 	ctxt->userData = user_data;
13573     else
13574 	ctxt->userData = ctxt;
13575     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13576     ctxt->dict = oldctxt->dict;
13577     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13578     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13579     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13580 
13581 #ifdef SAX2
13582     /* propagate namespaces down the entity */
13583     for (i = 0;i < oldctxt->nsNr;i += 2) {
13584         nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13585     }
13586 #endif
13587 
13588     oldsax = ctxt->sax;
13589     ctxt->sax = oldctxt->sax;
13590     xmlDetectSAX2(ctxt);
13591     ctxt->replaceEntities = oldctxt->replaceEntities;
13592     ctxt->options = oldctxt->options;
13593 
13594     ctxt->_private = oldctxt->_private;
13595     if (oldctxt->myDoc == NULL) {
13596 	newDoc = xmlNewDoc(BAD_CAST "1.0");
13597 	if (newDoc == NULL) {
13598 	    ctxt->sax = oldsax;
13599 	    ctxt->dict = NULL;
13600 	    xmlFreeParserCtxt(ctxt);
13601 	    return(XML_ERR_INTERNAL_ERROR);
13602 	}
13603 	newDoc->properties = XML_DOC_INTERNAL;
13604 	newDoc->dict = ctxt->dict;
13605 	xmlDictReference(newDoc->dict);
13606 	ctxt->myDoc = newDoc;
13607     } else {
13608 	ctxt->myDoc = oldctxt->myDoc;
13609         content = ctxt->myDoc->children;
13610 	last = ctxt->myDoc->last;
13611     }
13612     newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13613     if (newRoot == NULL) {
13614 	ctxt->sax = oldsax;
13615 	ctxt->dict = NULL;
13616 	xmlFreeParserCtxt(ctxt);
13617 	if (newDoc != NULL) {
13618 	    xmlFreeDoc(newDoc);
13619 	}
13620 	return(XML_ERR_INTERNAL_ERROR);
13621     }
13622     ctxt->myDoc->children = NULL;
13623     ctxt->myDoc->last = NULL;
13624     xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13625     nodePush(ctxt, ctxt->myDoc->children);
13626     ctxt->instate = XML_PARSER_CONTENT;
13627     ctxt->depth = oldctxt->depth + 1;
13628 
13629     ctxt->validate = 0;
13630     ctxt->loadsubset = oldctxt->loadsubset;
13631     if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13632 	/*
13633 	 * ID/IDREF registration will be done in xmlValidateElement below
13634 	 */
13635 	ctxt->loadsubset |= XML_SKIP_IDS;
13636     }
13637     ctxt->dictNames = oldctxt->dictNames;
13638     ctxt->attsDefault = oldctxt->attsDefault;
13639     ctxt->attsSpecial = oldctxt->attsSpecial;
13640 
13641     xmlParseContent(ctxt);
13642     if ((RAW == '<') && (NXT(1) == '/')) {
13643 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13644     } else if (RAW != 0) {
13645 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13646     }
13647     if (ctxt->node != ctxt->myDoc->children) {
13648 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13649     }
13650 
13651     if (!ctxt->wellFormed) {
13652         if (ctxt->errNo == 0)
13653 	    ret = XML_ERR_INTERNAL_ERROR;
13654 	else
13655 	    ret = (xmlParserErrors)ctxt->errNo;
13656     } else {
13657       ret = XML_ERR_OK;
13658     }
13659 
13660     if ((lst != NULL) && (ret == XML_ERR_OK)) {
13661 	xmlNodePtr cur;
13662 
13663 	/*
13664 	 * Return the newly created nodeset after unlinking it from
13665 	 * they pseudo parent.
13666 	 */
13667 	cur = ctxt->myDoc->children->children;
13668 	*lst = cur;
13669 	while (cur != NULL) {
13670 #ifdef LIBXML_VALID_ENABLED
13671 	    if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13672 		(oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13673 		(cur->type == XML_ELEMENT_NODE)) {
13674 		oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13675 			oldctxt->myDoc, cur);
13676 	    }
13677 #endif /* LIBXML_VALID_ENABLED */
13678 	    cur->parent = NULL;
13679 	    cur = cur->next;
13680 	}
13681 	ctxt->myDoc->children->children = NULL;
13682     }
13683     if (ctxt->myDoc != NULL) {
13684 	xmlFreeNode(ctxt->myDoc->children);
13685         ctxt->myDoc->children = content;
13686         ctxt->myDoc->last = last;
13687     }
13688 
13689     /*
13690      * Record in the parent context the number of entities replacement
13691      * done when parsing that reference.
13692      */
13693     if (oldctxt != NULL)
13694         oldctxt->nbentities += ctxt->nbentities;
13695 
13696     /*
13697      * Also record the last error if any
13698      */
13699     if (ctxt->lastError.code != XML_ERR_OK)
13700         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13701 
13702     ctxt->sax = oldsax;
13703     ctxt->dict = NULL;
13704     ctxt->attsDefault = NULL;
13705     ctxt->attsSpecial = NULL;
13706     xmlFreeParserCtxt(ctxt);
13707     if (newDoc != NULL) {
13708 	xmlFreeDoc(newDoc);
13709     }
13710 
13711     return(ret);
13712 }
13713 
13714 /**
13715  * xmlParseInNodeContext:
13716  * @node:  the context node
13717  * @data:  the input string
13718  * @datalen:  the input string length in bytes
13719  * @options:  a combination of xmlParserOption
13720  * @lst:  the return value for the set of parsed nodes
13721  *
13722  * Parse a well-balanced chunk of an XML document
13723  * within the context (DTD, namespaces, etc ...) of the given node.
13724  *
13725  * The allowed sequence for the data is a Well Balanced Chunk defined by
13726  * the content production in the XML grammar:
13727  *
13728  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13729  *
13730  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13731  * error code otherwise
13732  */
13733 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)13734 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13735                       int options, xmlNodePtr *lst) {
13736 #ifdef SAX2
13737     xmlParserCtxtPtr ctxt;
13738     xmlDocPtr doc = NULL;
13739     xmlNodePtr fake, cur;
13740     int nsnr = 0;
13741 
13742     xmlParserErrors ret = XML_ERR_OK;
13743 
13744     /*
13745      * check all input parameters, grab the document
13746      */
13747     if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13748         return(XML_ERR_INTERNAL_ERROR);
13749     switch (node->type) {
13750         case XML_ELEMENT_NODE:
13751         case XML_ATTRIBUTE_NODE:
13752         case XML_TEXT_NODE:
13753         case XML_CDATA_SECTION_NODE:
13754         case XML_ENTITY_REF_NODE:
13755         case XML_PI_NODE:
13756         case XML_COMMENT_NODE:
13757         case XML_DOCUMENT_NODE:
13758         case XML_HTML_DOCUMENT_NODE:
13759 	    break;
13760 	default:
13761 	    return(XML_ERR_INTERNAL_ERROR);
13762 
13763     }
13764     while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13765            (node->type != XML_DOCUMENT_NODE) &&
13766 	   (node->type != XML_HTML_DOCUMENT_NODE))
13767 	node = node->parent;
13768     if (node == NULL)
13769 	return(XML_ERR_INTERNAL_ERROR);
13770     if (node->type == XML_ELEMENT_NODE)
13771 	doc = node->doc;
13772     else
13773         doc = (xmlDocPtr) node;
13774     if (doc == NULL)
13775 	return(XML_ERR_INTERNAL_ERROR);
13776 
13777     /*
13778      * allocate a context and set-up everything not related to the
13779      * node position in the tree
13780      */
13781     if (doc->type == XML_DOCUMENT_NODE)
13782 	ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13783 #ifdef LIBXML_HTML_ENABLED
13784     else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13785 	ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13786         /*
13787          * When parsing in context, it makes no sense to add implied
13788          * elements like html/body/etc...
13789          */
13790         options |= HTML_PARSE_NOIMPLIED;
13791     }
13792 #endif
13793     else
13794         return(XML_ERR_INTERNAL_ERROR);
13795 
13796     if (ctxt == NULL)
13797         return(XML_ERR_NO_MEMORY);
13798 
13799     /*
13800      * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13801      * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13802      * we must wait until the last moment to free the original one.
13803      */
13804     if (doc->dict != NULL) {
13805         if (ctxt->dict != NULL)
13806 	    xmlDictFree(ctxt->dict);
13807 	ctxt->dict = doc->dict;
13808     } else
13809         options |= XML_PARSE_NODICT;
13810 
13811     if (doc->encoding != NULL) {
13812         xmlCharEncodingHandlerPtr hdlr;
13813 
13814         if (ctxt->encoding != NULL)
13815 	    xmlFree((xmlChar *) ctxt->encoding);
13816         ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13817 
13818         hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13819         if (hdlr != NULL) {
13820             xmlSwitchToEncoding(ctxt, hdlr);
13821 	} else {
13822             return(XML_ERR_UNSUPPORTED_ENCODING);
13823         }
13824     }
13825 
13826     xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13827     xmlDetectSAX2(ctxt);
13828     ctxt->myDoc = doc;
13829     /* parsing in context, i.e. as within existing content */
13830     ctxt->instate = XML_PARSER_CONTENT;
13831 
13832     fake = xmlNewComment(NULL);
13833     if (fake == NULL) {
13834         xmlFreeParserCtxt(ctxt);
13835 	return(XML_ERR_NO_MEMORY);
13836     }
13837     xmlAddChild(node, fake);
13838 
13839     if (node->type == XML_ELEMENT_NODE) {
13840 	nodePush(ctxt, node);
13841 	/*
13842 	 * initialize the SAX2 namespaces stack
13843 	 */
13844 	cur = node;
13845 	while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13846 	    xmlNsPtr ns = cur->nsDef;
13847 	    const xmlChar *iprefix, *ihref;
13848 
13849 	    while (ns != NULL) {
13850 		if (ctxt->dict) {
13851 		    iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13852 		    ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13853 		} else {
13854 		    iprefix = ns->prefix;
13855 		    ihref = ns->href;
13856 		}
13857 
13858 	        if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13859 		    nsPush(ctxt, iprefix, ihref);
13860 		    nsnr++;
13861 		}
13862 		ns = ns->next;
13863 	    }
13864 	    cur = cur->parent;
13865 	}
13866     }
13867 
13868     if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13869 	/*
13870 	 * ID/IDREF registration will be done in xmlValidateElement below
13871 	 */
13872 	ctxt->loadsubset |= XML_SKIP_IDS;
13873     }
13874 
13875 #ifdef LIBXML_HTML_ENABLED
13876     if (doc->type == XML_HTML_DOCUMENT_NODE)
13877         __htmlParseContent(ctxt);
13878     else
13879 #endif
13880 	xmlParseContent(ctxt);
13881 
13882     nsPop(ctxt, nsnr);
13883     if ((RAW == '<') && (NXT(1) == '/')) {
13884 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13885     } else if (RAW != 0) {
13886 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13887     }
13888     if ((ctxt->node != NULL) && (ctxt->node != node)) {
13889 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13890 	ctxt->wellFormed = 0;
13891     }
13892 
13893     if (!ctxt->wellFormed) {
13894         if (ctxt->errNo == 0)
13895 	    ret = XML_ERR_INTERNAL_ERROR;
13896 	else
13897 	    ret = (xmlParserErrors)ctxt->errNo;
13898     } else {
13899         ret = XML_ERR_OK;
13900     }
13901 
13902     /*
13903      * Return the newly created nodeset after unlinking it from
13904      * the pseudo sibling.
13905      */
13906 
13907     cur = fake->next;
13908     fake->next = NULL;
13909     node->last = fake;
13910 
13911     if (cur != NULL) {
13912 	cur->prev = NULL;
13913     }
13914 
13915     *lst = cur;
13916 
13917     while (cur != NULL) {
13918 	cur->parent = NULL;
13919 	cur = cur->next;
13920     }
13921 
13922     xmlUnlinkNode(fake);
13923     xmlFreeNode(fake);
13924 
13925 
13926     if (ret != XML_ERR_OK) {
13927         xmlFreeNodeList(*lst);
13928 	*lst = NULL;
13929     }
13930 
13931     if (doc->dict != NULL)
13932         ctxt->dict = NULL;
13933     xmlFreeParserCtxt(ctxt);
13934 
13935     return(ret);
13936 #else /* !SAX2 */
13937     return(XML_ERR_INTERNAL_ERROR);
13938 #endif
13939 }
13940 
13941 #ifdef LIBXML_SAX1_ENABLED
13942 /**
13943  * xmlParseBalancedChunkMemoryRecover:
13944  * @doc:  the document the chunk pertains to
13945  * @sax:  the SAX handler bloc (possibly NULL)
13946  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13947  * @depth:  Used for loop detection, use 0
13948  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13949  * @lst:  the return value for the set of parsed nodes
13950  * @recover: return nodes even if the data is broken (use 0)
13951  *
13952  *
13953  * Parse a well-balanced chunk of an XML document
13954  * called by the parser
13955  * The allowed sequence for the Well Balanced Chunk is the one defined by
13956  * the content production in the XML grammar:
13957  *
13958  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13959  *
13960  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13961  *    the parser error code otherwise
13962  *
13963  * In case recover is set to 1, the nodelist will not be empty even if
13964  * the parsed chunk is not well balanced, assuming the parsing succeeded to
13965  * some extent.
13966  */
13967 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst,int recover)13968 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13969      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13970      int recover) {
13971     xmlParserCtxtPtr ctxt;
13972     xmlDocPtr newDoc;
13973     xmlSAXHandlerPtr oldsax = NULL;
13974     xmlNodePtr content, newRoot;
13975     int size;
13976     int ret = 0;
13977 
13978     if (depth > 40) {
13979 	return(XML_ERR_ENTITY_LOOP);
13980     }
13981 
13982 
13983     if (lst != NULL)
13984         *lst = NULL;
13985     if (string == NULL)
13986         return(-1);
13987 
13988     size = xmlStrlen(string);
13989 
13990     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13991     if (ctxt == NULL) return(-1);
13992     ctxt->userData = ctxt;
13993     if (sax != NULL) {
13994 	oldsax = ctxt->sax;
13995         ctxt->sax = sax;
13996 	if (user_data != NULL)
13997 	    ctxt->userData = user_data;
13998     }
13999     newDoc = xmlNewDoc(BAD_CAST "1.0");
14000     if (newDoc == NULL) {
14001 	xmlFreeParserCtxt(ctxt);
14002 	return(-1);
14003     }
14004     newDoc->properties = XML_DOC_INTERNAL;
14005     if ((doc != NULL) && (doc->dict != NULL)) {
14006         xmlDictFree(ctxt->dict);
14007 	ctxt->dict = doc->dict;
14008 	xmlDictReference(ctxt->dict);
14009 	ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
14010 	ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
14011 	ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
14012 	ctxt->dictNames = 1;
14013     } else {
14014 	xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
14015     }
14016     if (doc != NULL) {
14017 	newDoc->intSubset = doc->intSubset;
14018 	newDoc->extSubset = doc->extSubset;
14019     }
14020     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
14021     if (newRoot == NULL) {
14022 	if (sax != NULL)
14023 	    ctxt->sax = oldsax;
14024 	xmlFreeParserCtxt(ctxt);
14025 	newDoc->intSubset = NULL;
14026 	newDoc->extSubset = NULL;
14027         xmlFreeDoc(newDoc);
14028 	return(-1);
14029     }
14030     xmlAddChild((xmlNodePtr) newDoc, newRoot);
14031     nodePush(ctxt, newRoot);
14032     if (doc == NULL) {
14033 	ctxt->myDoc = newDoc;
14034     } else {
14035 	ctxt->myDoc = newDoc;
14036 	newDoc->children->doc = doc;
14037 	/* Ensure that doc has XML spec namespace */
14038 	xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
14039 	newDoc->oldNs = doc->oldNs;
14040     }
14041     ctxt->instate = XML_PARSER_CONTENT;
14042     ctxt->depth = depth;
14043 
14044     /*
14045      * Doing validity checking on chunk doesn't make sense
14046      */
14047     ctxt->validate = 0;
14048     ctxt->loadsubset = 0;
14049     xmlDetectSAX2(ctxt);
14050 
14051     if ( doc != NULL ){
14052         content = doc->children;
14053         doc->children = NULL;
14054         xmlParseContent(ctxt);
14055         doc->children = content;
14056     }
14057     else {
14058         xmlParseContent(ctxt);
14059     }
14060     if ((RAW == '<') && (NXT(1) == '/')) {
14061 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
14062     } else if (RAW != 0) {
14063 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
14064     }
14065     if (ctxt->node != newDoc->children) {
14066 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
14067     }
14068 
14069     if (!ctxt->wellFormed) {
14070         if (ctxt->errNo == 0)
14071 	    ret = 1;
14072 	else
14073 	    ret = ctxt->errNo;
14074     } else {
14075       ret = 0;
14076     }
14077 
14078     if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
14079 	xmlNodePtr cur;
14080 
14081 	/*
14082 	 * Return the newly created nodeset after unlinking it from
14083 	 * they pseudo parent.
14084 	 */
14085 	cur = newDoc->children->children;
14086 	*lst = cur;
14087 	while (cur != NULL) {
14088 	    xmlSetTreeDoc(cur, doc);
14089 	    cur->parent = NULL;
14090 	    cur = cur->next;
14091 	}
14092 	newDoc->children->children = NULL;
14093     }
14094 
14095     if (sax != NULL)
14096 	ctxt->sax = oldsax;
14097     xmlFreeParserCtxt(ctxt);
14098     newDoc->intSubset = NULL;
14099     newDoc->extSubset = NULL;
14100     newDoc->oldNs = NULL;
14101     xmlFreeDoc(newDoc);
14102 
14103     return(ret);
14104 }
14105 
14106 /**
14107  * xmlSAXParseEntity:
14108  * @sax:  the SAX handler block
14109  * @filename:  the filename
14110  *
14111  * parse an XML external entity out of context and build a tree.
14112  * It use the given SAX function block to handle the parsing callback.
14113  * If sax is NULL, fallback to the default DOM tree building routines.
14114  *
14115  * [78] extParsedEnt ::= TextDecl? content
14116  *
14117  * This correspond to a "Well Balanced" chunk
14118  *
14119  * Returns the resulting document tree
14120  */
14121 
14122 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)14123 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
14124     xmlDocPtr ret;
14125     xmlParserCtxtPtr ctxt;
14126 
14127     ctxt = xmlCreateFileParserCtxt(filename);
14128     if (ctxt == NULL) {
14129 	return(NULL);
14130     }
14131     if (sax != NULL) {
14132 	if (ctxt->sax != NULL)
14133 	    xmlFree(ctxt->sax);
14134         ctxt->sax = sax;
14135         ctxt->userData = NULL;
14136     }
14137 
14138     xmlParseExtParsedEnt(ctxt);
14139 
14140     if (ctxt->wellFormed)
14141 	ret = ctxt->myDoc;
14142     else {
14143         ret = NULL;
14144         xmlFreeDoc(ctxt->myDoc);
14145         ctxt->myDoc = NULL;
14146     }
14147     if (sax != NULL)
14148         ctxt->sax = NULL;
14149     xmlFreeParserCtxt(ctxt);
14150 
14151     return(ret);
14152 }
14153 
14154 /**
14155  * xmlParseEntity:
14156  * @filename:  the filename
14157  *
14158  * parse an XML external entity out of context and build a tree.
14159  *
14160  * [78] extParsedEnt ::= TextDecl? content
14161  *
14162  * This correspond to a "Well Balanced" chunk
14163  *
14164  * Returns the resulting document tree
14165  */
14166 
14167 xmlDocPtr
xmlParseEntity(const char * filename)14168 xmlParseEntity(const char *filename) {
14169     return(xmlSAXParseEntity(NULL, filename));
14170 }
14171 #endif /* LIBXML_SAX1_ENABLED */
14172 
14173 /**
14174  * xmlCreateEntityParserCtxtInternal:
14175  * @URL:  the entity URL
14176  * @ID:  the entity PUBLIC ID
14177  * @base:  a possible base for the target URI
14178  * @pctx:  parser context used to set options on new context
14179  *
14180  * Create a parser context for an external entity
14181  * Automatic support for ZLIB/Compress compressed document is provided
14182  * by default if found at compile-time.
14183  *
14184  * Returns the new parser context or NULL
14185  */
14186 static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(const xmlChar * URL,const xmlChar * ID,const xmlChar * base,xmlParserCtxtPtr pctx)14187 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
14188 	                  const xmlChar *base, xmlParserCtxtPtr pctx) {
14189     xmlParserCtxtPtr ctxt;
14190     xmlParserInputPtr inputStream;
14191     char *directory = NULL;
14192     xmlChar *uri;
14193 
14194     ctxt = xmlNewParserCtxt();
14195     if (ctxt == NULL) {
14196 	return(NULL);
14197     }
14198 
14199     if (pctx != NULL) {
14200         ctxt->options = pctx->options;
14201         ctxt->_private = pctx->_private;
14202     }
14203 
14204     uri = xmlBuildURI(URL, base);
14205 
14206     if (uri == NULL) {
14207 	inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14208 	if (inputStream == NULL) {
14209 	    xmlFreeParserCtxt(ctxt);
14210 	    return(NULL);
14211 	}
14212 
14213 	inputPush(ctxt, inputStream);
14214 
14215 	if ((ctxt->directory == NULL) && (directory == NULL))
14216 	    directory = xmlParserGetDirectory((char *)URL);
14217 	if ((ctxt->directory == NULL) && (directory != NULL))
14218 	    ctxt->directory = directory;
14219     } else {
14220 	inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14221 	if (inputStream == NULL) {
14222 	    xmlFree(uri);
14223 	    xmlFreeParserCtxt(ctxt);
14224 	    return(NULL);
14225 	}
14226 
14227 	inputPush(ctxt, inputStream);
14228 
14229 	if ((ctxt->directory == NULL) && (directory == NULL))
14230 	    directory = xmlParserGetDirectory((char *)uri);
14231 	if ((ctxt->directory == NULL) && (directory != NULL))
14232 	    ctxt->directory = directory;
14233 	xmlFree(uri);
14234     }
14235     return(ctxt);
14236 }
14237 
14238 /**
14239  * xmlCreateEntityParserCtxt:
14240  * @URL:  the entity URL
14241  * @ID:  the entity PUBLIC ID
14242  * @base:  a possible base for the target URI
14243  *
14244  * Create a parser context for an external entity
14245  * Automatic support for ZLIB/Compress compressed document is provided
14246  * by default if found at compile-time.
14247  *
14248  * Returns the new parser context or NULL
14249  */
14250 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)14251 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14252 	                  const xmlChar *base) {
14253     return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14254 
14255 }
14256 
14257 /************************************************************************
14258  *									*
14259  *		Front ends when parsing from a file			*
14260  *									*
14261  ************************************************************************/
14262 
14263 /**
14264  * xmlCreateURLParserCtxt:
14265  * @filename:  the filename or URL
14266  * @options:  a combination of xmlParserOption
14267  *
14268  * Create a parser context for a file or URL content.
14269  * Automatic support for ZLIB/Compress compressed document is provided
14270  * by default if found at compile-time and for file accesses
14271  *
14272  * Returns the new parser context or NULL
14273  */
14274 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)14275 xmlCreateURLParserCtxt(const char *filename, int options)
14276 {
14277     xmlParserCtxtPtr ctxt;
14278     xmlParserInputPtr inputStream;
14279     char *directory = NULL;
14280 
14281     ctxt = xmlNewParserCtxt();
14282     if (ctxt == NULL) {
14283 	xmlErrMemory(NULL, "cannot allocate parser context");
14284 	return(NULL);
14285     }
14286 
14287     if (options)
14288 	xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14289     ctxt->linenumbers = 1;
14290 
14291     inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14292     if (inputStream == NULL) {
14293 	xmlFreeParserCtxt(ctxt);
14294 	return(NULL);
14295     }
14296 
14297     inputPush(ctxt, inputStream);
14298     if ((ctxt->directory == NULL) && (directory == NULL))
14299         directory = xmlParserGetDirectory(filename);
14300     if ((ctxt->directory == NULL) && (directory != NULL))
14301         ctxt->directory = directory;
14302 
14303     return(ctxt);
14304 }
14305 
14306 /**
14307  * xmlCreateFileParserCtxt:
14308  * @filename:  the filename
14309  *
14310  * Create a parser context for a file content.
14311  * Automatic support for ZLIB/Compress compressed document is provided
14312  * by default if found at compile-time.
14313  *
14314  * Returns the new parser context or NULL
14315  */
14316 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)14317 xmlCreateFileParserCtxt(const char *filename)
14318 {
14319     return(xmlCreateURLParserCtxt(filename, 0));
14320 }
14321 
14322 #ifdef LIBXML_SAX1_ENABLED
14323 /**
14324  * xmlSAXParseFileWithData:
14325  * @sax:  the SAX handler block
14326  * @filename:  the filename
14327  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14328  *             documents
14329  * @data:  the userdata
14330  *
14331  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14332  * compressed document is provided by default if found at compile-time.
14333  * It use the given SAX function block to handle the parsing callback.
14334  * If sax is NULL, fallback to the default DOM tree building routines.
14335  *
14336  * User data (void *) is stored within the parser context in the
14337  * context's _private member, so it is available nearly everywhere in libxml
14338  *
14339  * Returns the resulting document tree
14340  */
14341 
14342 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)14343 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14344                         int recovery, void *data) {
14345     xmlDocPtr ret;
14346     xmlParserCtxtPtr ctxt;
14347 
14348     xmlInitParser();
14349 
14350     ctxt = xmlCreateFileParserCtxt(filename);
14351     if (ctxt == NULL) {
14352 	return(NULL);
14353     }
14354     if (sax != NULL) {
14355 	if (ctxt->sax != NULL)
14356 	    xmlFree(ctxt->sax);
14357         ctxt->sax = sax;
14358     }
14359     xmlDetectSAX2(ctxt);
14360     if (data!=NULL) {
14361 	ctxt->_private = data;
14362     }
14363 
14364     if (ctxt->directory == NULL)
14365         ctxt->directory = xmlParserGetDirectory(filename);
14366 
14367     ctxt->recovery = recovery;
14368 
14369     xmlParseDocument(ctxt);
14370 
14371     if ((ctxt->wellFormed) || recovery) {
14372         ret = ctxt->myDoc;
14373 	if (ret != NULL) {
14374 	    if (ctxt->input->buf->compressed > 0)
14375 		ret->compression = 9;
14376 	    else
14377 		ret->compression = ctxt->input->buf->compressed;
14378 	}
14379     }
14380     else {
14381        ret = NULL;
14382        xmlFreeDoc(ctxt->myDoc);
14383        ctxt->myDoc = NULL;
14384     }
14385     if (sax != NULL)
14386         ctxt->sax = NULL;
14387     xmlFreeParserCtxt(ctxt);
14388 
14389     return(ret);
14390 }
14391 
14392 /**
14393  * xmlSAXParseFile:
14394  * @sax:  the SAX handler block
14395  * @filename:  the filename
14396  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14397  *             documents
14398  *
14399  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14400  * compressed document is provided by default if found at compile-time.
14401  * It use the given SAX function block to handle the parsing callback.
14402  * If sax is NULL, fallback to the default DOM tree building routines.
14403  *
14404  * Returns the resulting document tree
14405  */
14406 
14407 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)14408 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14409                           int recovery) {
14410     return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14411 }
14412 
14413 /**
14414  * xmlRecoverDoc:
14415  * @cur:  a pointer to an array of xmlChar
14416  *
14417  * parse an XML in-memory document and build a tree.
14418  * In the case the document is not Well Formed, a attempt to build a
14419  * tree is tried anyway
14420  *
14421  * Returns the resulting document tree or NULL in case of failure
14422  */
14423 
14424 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)14425 xmlRecoverDoc(const xmlChar *cur) {
14426     return(xmlSAXParseDoc(NULL, cur, 1));
14427 }
14428 
14429 /**
14430  * xmlParseFile:
14431  * @filename:  the filename
14432  *
14433  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14434  * compressed document is provided by default if found at compile-time.
14435  *
14436  * Returns the resulting document tree if the file was wellformed,
14437  * NULL otherwise.
14438  */
14439 
14440 xmlDocPtr
xmlParseFile(const char * filename)14441 xmlParseFile(const char *filename) {
14442     return(xmlSAXParseFile(NULL, filename, 0));
14443 }
14444 
14445 /**
14446  * xmlRecoverFile:
14447  * @filename:  the filename
14448  *
14449  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14450  * compressed document is provided by default if found at compile-time.
14451  * In the case the document is not Well Formed, it attempts to build
14452  * a tree anyway
14453  *
14454  * Returns the resulting document tree or NULL in case of failure
14455  */
14456 
14457 xmlDocPtr
xmlRecoverFile(const char * filename)14458 xmlRecoverFile(const char *filename) {
14459     return(xmlSAXParseFile(NULL, filename, 1));
14460 }
14461 
14462 
14463 /**
14464  * xmlSetupParserForBuffer:
14465  * @ctxt:  an XML parser context
14466  * @buffer:  a xmlChar * buffer
14467  * @filename:  a file name
14468  *
14469  * Setup the parser context to parse a new buffer; Clears any prior
14470  * contents from the parser context. The buffer parameter must not be
14471  * NULL, but the filename parameter can be
14472  */
14473 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)14474 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14475                              const char* filename)
14476 {
14477     xmlParserInputPtr input;
14478 
14479     if ((ctxt == NULL) || (buffer == NULL))
14480         return;
14481 
14482     input = xmlNewInputStream(ctxt);
14483     if (input == NULL) {
14484         xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14485         xmlClearParserCtxt(ctxt);
14486         return;
14487     }
14488 
14489     xmlClearParserCtxt(ctxt);
14490     if (filename != NULL)
14491         input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14492     input->base = buffer;
14493     input->cur = buffer;
14494     input->end = &buffer[xmlStrlen(buffer)];
14495     inputPush(ctxt, input);
14496 }
14497 
14498 /**
14499  * xmlSAXUserParseFile:
14500  * @sax:  a SAX handler
14501  * @user_data:  The user data returned on SAX callbacks
14502  * @filename:  a file name
14503  *
14504  * parse an XML file and call the given SAX handler routines.
14505  * Automatic support for ZLIB/Compress compressed document is provided
14506  *
14507  * Returns 0 in case of success or a error number otherwise
14508  */
14509 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)14510 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14511                     const char *filename) {
14512     int ret = 0;
14513     xmlParserCtxtPtr ctxt;
14514 
14515     ctxt = xmlCreateFileParserCtxt(filename);
14516     if (ctxt == NULL) return -1;
14517     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14518 	xmlFree(ctxt->sax);
14519     ctxt->sax = sax;
14520     xmlDetectSAX2(ctxt);
14521 
14522     if (user_data != NULL)
14523 	ctxt->userData = user_data;
14524 
14525     xmlParseDocument(ctxt);
14526 
14527     if (ctxt->wellFormed)
14528 	ret = 0;
14529     else {
14530         if (ctxt->errNo != 0)
14531 	    ret = ctxt->errNo;
14532 	else
14533 	    ret = -1;
14534     }
14535     if (sax != NULL)
14536 	ctxt->sax = NULL;
14537     if (ctxt->myDoc != NULL) {
14538         xmlFreeDoc(ctxt->myDoc);
14539 	ctxt->myDoc = NULL;
14540     }
14541     xmlFreeParserCtxt(ctxt);
14542 
14543     return ret;
14544 }
14545 #endif /* LIBXML_SAX1_ENABLED */
14546 
14547 /************************************************************************
14548  *									*
14549  *		Front ends when parsing from memory			*
14550  *									*
14551  ************************************************************************/
14552 
14553 /**
14554  * xmlCreateMemoryParserCtxt:
14555  * @buffer:  a pointer to a char array
14556  * @size:  the size of the array
14557  *
14558  * Create a parser context for an XML in-memory document.
14559  *
14560  * Returns the new parser context or NULL
14561  */
14562 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)14563 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14564     xmlParserCtxtPtr ctxt;
14565     xmlParserInputPtr input;
14566     xmlParserInputBufferPtr buf;
14567 
14568     if (buffer == NULL)
14569 	return(NULL);
14570     if (size <= 0)
14571 	return(NULL);
14572 
14573     ctxt = xmlNewParserCtxt();
14574     if (ctxt == NULL)
14575 	return(NULL);
14576 
14577     /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14578     buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14579     if (buf == NULL) {
14580 	xmlFreeParserCtxt(ctxt);
14581 	return(NULL);
14582     }
14583 
14584     input = xmlNewInputStream(ctxt);
14585     if (input == NULL) {
14586 	xmlFreeParserInputBuffer(buf);
14587 	xmlFreeParserCtxt(ctxt);
14588 	return(NULL);
14589     }
14590 
14591     input->filename = NULL;
14592     input->buf = buf;
14593     xmlBufResetInput(input->buf->buffer, input);
14594 
14595     inputPush(ctxt, input);
14596     return(ctxt);
14597 }
14598 
14599 #ifdef LIBXML_SAX1_ENABLED
14600 /**
14601  * xmlSAXParseMemoryWithData:
14602  * @sax:  the SAX handler block
14603  * @buffer:  an pointer to a char array
14604  * @size:  the size of the array
14605  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14606  *             documents
14607  * @data:  the userdata
14608  *
14609  * parse an XML in-memory block and use the given SAX function block
14610  * to handle the parsing callback. If sax is NULL, fallback to the default
14611  * DOM tree building routines.
14612  *
14613  * User data (void *) is stored within the parser context in the
14614  * context's _private member, so it is available nearly everywhere in libxml
14615  *
14616  * Returns the resulting document tree
14617  */
14618 
14619 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)14620 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14621 	          int size, int recovery, void *data) {
14622     xmlDocPtr ret;
14623     xmlParserCtxtPtr ctxt;
14624 
14625     xmlInitParser();
14626 
14627     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14628     if (ctxt == NULL) return(NULL);
14629     if (sax != NULL) {
14630 	if (ctxt->sax != NULL)
14631 	    xmlFree(ctxt->sax);
14632         ctxt->sax = sax;
14633     }
14634     xmlDetectSAX2(ctxt);
14635     if (data!=NULL) {
14636 	ctxt->_private=data;
14637     }
14638 
14639     ctxt->recovery = recovery;
14640 
14641     xmlParseDocument(ctxt);
14642 
14643     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14644     else {
14645        ret = NULL;
14646        xmlFreeDoc(ctxt->myDoc);
14647        ctxt->myDoc = NULL;
14648     }
14649     if (sax != NULL)
14650 	ctxt->sax = NULL;
14651     xmlFreeParserCtxt(ctxt);
14652 
14653     return(ret);
14654 }
14655 
14656 /**
14657  * xmlSAXParseMemory:
14658  * @sax:  the SAX handler block
14659  * @buffer:  an pointer to a char array
14660  * @size:  the size of the array
14661  * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14662  *             documents
14663  *
14664  * parse an XML in-memory block and use the given SAX function block
14665  * to handle the parsing callback. If sax is NULL, fallback to the default
14666  * DOM tree building routines.
14667  *
14668  * Returns the resulting document tree
14669  */
14670 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)14671 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14672 	          int size, int recovery) {
14673     return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14674 }
14675 
14676 /**
14677  * xmlParseMemory:
14678  * @buffer:  an pointer to a char array
14679  * @size:  the size of the array
14680  *
14681  * parse an XML in-memory block and build a tree.
14682  *
14683  * Returns the resulting document tree
14684  */
14685 
xmlParseMemory(const char * buffer,int size)14686 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14687    return(xmlSAXParseMemory(NULL, buffer, size, 0));
14688 }
14689 
14690 /**
14691  * xmlRecoverMemory:
14692  * @buffer:  an pointer to a char array
14693  * @size:  the size of the array
14694  *
14695  * parse an XML in-memory block and build a tree.
14696  * In the case the document is not Well Formed, an attempt to
14697  * build a tree is tried anyway
14698  *
14699  * Returns the resulting document tree or NULL in case of error
14700  */
14701 
xmlRecoverMemory(const char * buffer,int size)14702 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14703    return(xmlSAXParseMemory(NULL, buffer, size, 1));
14704 }
14705 
14706 /**
14707  * xmlSAXUserParseMemory:
14708  * @sax:  a SAX handler
14709  * @user_data:  The user data returned on SAX callbacks
14710  * @buffer:  an in-memory XML document input
14711  * @size:  the length of the XML document in bytes
14712  *
14713  * A better SAX parsing routine.
14714  * parse an XML in-memory buffer and call the given SAX handler routines.
14715  *
14716  * Returns 0 in case of success or a error number otherwise
14717  */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)14718 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14719 			  const char *buffer, int size) {
14720     int ret = 0;
14721     xmlParserCtxtPtr ctxt;
14722 
14723     xmlInitParser();
14724 
14725     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14726     if (ctxt == NULL) return -1;
14727     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14728         xmlFree(ctxt->sax);
14729     ctxt->sax = sax;
14730     xmlDetectSAX2(ctxt);
14731 
14732     if (user_data != NULL)
14733 	ctxt->userData = user_data;
14734 
14735     xmlParseDocument(ctxt);
14736 
14737     if (ctxt->wellFormed)
14738 	ret = 0;
14739     else {
14740         if (ctxt->errNo != 0)
14741 	    ret = ctxt->errNo;
14742 	else
14743 	    ret = -1;
14744     }
14745     if (sax != NULL)
14746         ctxt->sax = NULL;
14747     if (ctxt->myDoc != NULL) {
14748         xmlFreeDoc(ctxt->myDoc);
14749 	ctxt->myDoc = NULL;
14750     }
14751     xmlFreeParserCtxt(ctxt);
14752 
14753     return ret;
14754 }
14755 #endif /* LIBXML_SAX1_ENABLED */
14756 
14757 /**
14758  * xmlCreateDocParserCtxt:
14759  * @cur:  a pointer to an array of xmlChar
14760  *
14761  * Creates a parser context for an XML in-memory document.
14762  *
14763  * Returns the new parser context or NULL
14764  */
14765 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * cur)14766 xmlCreateDocParserCtxt(const xmlChar *cur) {
14767     int len;
14768 
14769     if (cur == NULL)
14770 	return(NULL);
14771     len = xmlStrlen(cur);
14772     return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14773 }
14774 
14775 #ifdef LIBXML_SAX1_ENABLED
14776 /**
14777  * xmlSAXParseDoc:
14778  * @sax:  the SAX handler block
14779  * @cur:  a pointer to an array of xmlChar
14780  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14781  *             documents
14782  *
14783  * parse an XML in-memory document and build a tree.
14784  * It use the given SAX function block to handle the parsing callback.
14785  * If sax is NULL, fallback to the default DOM tree building routines.
14786  *
14787  * Returns the resulting document tree
14788  */
14789 
14790 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)14791 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14792     xmlDocPtr ret;
14793     xmlParserCtxtPtr ctxt;
14794     xmlSAXHandlerPtr oldsax = NULL;
14795 
14796     if (cur == NULL) return(NULL);
14797 
14798 
14799     ctxt = xmlCreateDocParserCtxt(cur);
14800     if (ctxt == NULL) return(NULL);
14801     if (sax != NULL) {
14802         oldsax = ctxt->sax;
14803         ctxt->sax = sax;
14804         ctxt->userData = NULL;
14805     }
14806     xmlDetectSAX2(ctxt);
14807 
14808     xmlParseDocument(ctxt);
14809     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14810     else {
14811        ret = NULL;
14812        xmlFreeDoc(ctxt->myDoc);
14813        ctxt->myDoc = NULL;
14814     }
14815     if (sax != NULL)
14816 	ctxt->sax = oldsax;
14817     xmlFreeParserCtxt(ctxt);
14818 
14819     return(ret);
14820 }
14821 
14822 /**
14823  * xmlParseDoc:
14824  * @cur:  a pointer to an array of xmlChar
14825  *
14826  * parse an XML in-memory document and build a tree.
14827  *
14828  * Returns the resulting document tree
14829  */
14830 
14831 xmlDocPtr
xmlParseDoc(const xmlChar * cur)14832 xmlParseDoc(const xmlChar *cur) {
14833     return(xmlSAXParseDoc(NULL, cur, 0));
14834 }
14835 #endif /* LIBXML_SAX1_ENABLED */
14836 
14837 #ifdef LIBXML_LEGACY_ENABLED
14838 /************************************************************************
14839  *									*
14840  *	Specific function to keep track of entities references		*
14841  *	and used by the XSLT debugger					*
14842  *									*
14843  ************************************************************************/
14844 
14845 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14846 
14847 /**
14848  * xmlAddEntityReference:
14849  * @ent : A valid entity
14850  * @firstNode : A valid first node for children of entity
14851  * @lastNode : A valid last node of children entity
14852  *
14853  * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14854  */
14855 static void
xmlAddEntityReference(xmlEntityPtr ent,xmlNodePtr firstNode,xmlNodePtr lastNode)14856 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14857                       xmlNodePtr lastNode)
14858 {
14859     if (xmlEntityRefFunc != NULL) {
14860         (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14861     }
14862 }
14863 
14864 
14865 /**
14866  * xmlSetEntityReferenceFunc:
14867  * @func: A valid function
14868  *
14869  * Set the function to call call back when a xml reference has been made
14870  */
14871 void
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)14872 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14873 {
14874     xmlEntityRefFunc = func;
14875 }
14876 #endif /* LIBXML_LEGACY_ENABLED */
14877 
14878 /************************************************************************
14879  *									*
14880  *				Miscellaneous				*
14881  *									*
14882  ************************************************************************/
14883 
14884 #ifdef LIBXML_XPATH_ENABLED
14885 #include <libxml/xpath.h>
14886 #endif
14887 
14888 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14889 static int xmlParserInitialized = 0;
14890 
14891 /**
14892  * xmlInitParser:
14893  *
14894  * Initialization function for the XML parser.
14895  * This is not reentrant. Call once before processing in case of
14896  * use in multithreaded programs.
14897  */
14898 
14899 void
xmlInitParser(void)14900 xmlInitParser(void) {
14901     if (xmlParserInitialized != 0)
14902 	return;
14903 
14904 #ifdef LIBXML_THREAD_ENABLED
14905     __xmlGlobalInitMutexLock();
14906     if (xmlParserInitialized == 0) {
14907 #endif
14908 	xmlInitThreads();
14909 	xmlInitGlobals();
14910 	if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14911 	    (xmlGenericError == NULL))
14912 	    initGenericErrorDefaultFunc(NULL);
14913 	xmlInitMemory();
14914         xmlInitializeDict();
14915 	xmlInitCharEncodingHandlers();
14916 	xmlDefaultSAXHandlerInit();
14917 	xmlRegisterDefaultInputCallbacks();
14918 #ifdef LIBXML_OUTPUT_ENABLED
14919 	xmlRegisterDefaultOutputCallbacks();
14920 #endif /* LIBXML_OUTPUT_ENABLED */
14921 #ifdef LIBXML_HTML_ENABLED
14922 	htmlInitAutoClose();
14923 	htmlDefaultSAXHandlerInit();
14924 #endif
14925 #ifdef LIBXML_XPATH_ENABLED
14926 	xmlXPathInit();
14927 #endif
14928 	xmlParserInitialized = 1;
14929 #ifdef LIBXML_THREAD_ENABLED
14930     }
14931     __xmlGlobalInitMutexUnlock();
14932 #endif
14933 }
14934 
14935 /**
14936  * xmlCleanupParser:
14937  *
14938  * This function name is somewhat misleading. It does not clean up
14939  * parser state, it cleans up memory allocated by the library itself.
14940  * It is a cleanup function for the XML library. It tries to reclaim all
14941  * related global memory allocated for the library processing.
14942  * It doesn't deallocate any document related memory. One should
14943  * call xmlCleanupParser() only when the process has finished using
14944  * the library and all XML/HTML documents built with it.
14945  * See also xmlInitParser() which has the opposite function of preparing
14946  * the library for operations.
14947  *
14948  * WARNING: if your application is multithreaded or has plugin support
14949  *          calling this may crash the application if another thread or
14950  *          a plugin is still using libxml2. It's sometimes very hard to
14951  *          guess if libxml2 is in use in the application, some libraries
14952  *          or plugins may use it without notice. In case of doubt abstain
14953  *          from calling this function or do it just before calling exit()
14954  *          to avoid leak reports from valgrind !
14955  */
14956 
14957 void
xmlCleanupParser(void)14958 xmlCleanupParser(void) {
14959     if (!xmlParserInitialized)
14960 	return;
14961 
14962     xmlCleanupCharEncodingHandlers();
14963 #ifdef LIBXML_CATALOG_ENABLED
14964     xmlCatalogCleanup();
14965 #endif
14966     xmlDictCleanup();
14967     xmlCleanupInputCallbacks();
14968 #ifdef LIBXML_OUTPUT_ENABLED
14969     xmlCleanupOutputCallbacks();
14970 #endif
14971 #ifdef LIBXML_SCHEMAS_ENABLED
14972     xmlSchemaCleanupTypes();
14973     xmlRelaxNGCleanupTypes();
14974 #endif
14975     xmlResetLastError();
14976     xmlCleanupGlobals();
14977     xmlCleanupThreads(); /* must be last if called not from the main thread */
14978     xmlCleanupMemory();
14979     xmlParserInitialized = 0;
14980 }
14981 
14982 /************************************************************************
14983  *									*
14984  *	New set (2.6.0) of simpler and more flexible APIs		*
14985  *									*
14986  ************************************************************************/
14987 
14988 /**
14989  * DICT_FREE:
14990  * @str:  a string
14991  *
14992  * Free a string if it is not owned by the "dict" dictionnary in the
14993  * current scope
14994  */
14995 #define DICT_FREE(str)						\
14996 	if ((str) && ((!dict) ||				\
14997 	    (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))	\
14998 	    xmlFree((char *)(str));
14999 
15000 /**
15001  * xmlCtxtReset:
15002  * @ctxt: an XML parser context
15003  *
15004  * Reset a parser context
15005  */
15006 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)15007 xmlCtxtReset(xmlParserCtxtPtr ctxt)
15008 {
15009     xmlParserInputPtr input;
15010     xmlDictPtr dict;
15011 
15012     if (ctxt == NULL)
15013         return;
15014 
15015     dict = ctxt->dict;
15016 
15017     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
15018         xmlFreeInputStream(input);
15019     }
15020     ctxt->inputNr = 0;
15021     ctxt->input = NULL;
15022 
15023     ctxt->spaceNr = 0;
15024     if (ctxt->spaceTab != NULL) {
15025 	ctxt->spaceTab[0] = -1;
15026 	ctxt->space = &ctxt->spaceTab[0];
15027     } else {
15028         ctxt->space = NULL;
15029     }
15030 
15031 
15032     ctxt->nodeNr = 0;
15033     ctxt->node = NULL;
15034 
15035     ctxt->nameNr = 0;
15036     ctxt->name = NULL;
15037 
15038     DICT_FREE(ctxt->version);
15039     ctxt->version = NULL;
15040     DICT_FREE(ctxt->encoding);
15041     ctxt->encoding = NULL;
15042     DICT_FREE(ctxt->directory);
15043     ctxt->directory = NULL;
15044     DICT_FREE(ctxt->extSubURI);
15045     ctxt->extSubURI = NULL;
15046     DICT_FREE(ctxt->extSubSystem);
15047     ctxt->extSubSystem = NULL;
15048     if (ctxt->myDoc != NULL)
15049         xmlFreeDoc(ctxt->myDoc);
15050     ctxt->myDoc = NULL;
15051 
15052     ctxt->standalone = -1;
15053     ctxt->hasExternalSubset = 0;
15054     ctxt->hasPErefs = 0;
15055     ctxt->html = 0;
15056     ctxt->external = 0;
15057     ctxt->instate = XML_PARSER_START;
15058     ctxt->token = 0;
15059 
15060     ctxt->wellFormed = 1;
15061     ctxt->nsWellFormed = 1;
15062     ctxt->disableSAX = 0;
15063     ctxt->valid = 1;
15064 #if 0
15065     ctxt->vctxt.userData = ctxt;
15066     ctxt->vctxt.error = xmlParserValidityError;
15067     ctxt->vctxt.warning = xmlParserValidityWarning;
15068 #endif
15069     ctxt->record_info = 0;
15070     ctxt->nbChars = 0;
15071     ctxt->checkIndex = 0;
15072     ctxt->inSubset = 0;
15073     ctxt->errNo = XML_ERR_OK;
15074     ctxt->depth = 0;
15075     ctxt->charset = XML_CHAR_ENCODING_UTF8;
15076     ctxt->catalogs = NULL;
15077     ctxt->nbentities = 0;
15078     ctxt->sizeentities = 0;
15079     ctxt->sizeentcopy = 0;
15080     xmlInitNodeInfoSeq(&ctxt->node_seq);
15081 
15082     if (ctxt->attsDefault != NULL) {
15083         xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
15084         ctxt->attsDefault = NULL;
15085     }
15086     if (ctxt->attsSpecial != NULL) {
15087         xmlHashFree(ctxt->attsSpecial, NULL);
15088         ctxt->attsSpecial = NULL;
15089     }
15090 
15091 #ifdef LIBXML_CATALOG_ENABLED
15092     if (ctxt->catalogs != NULL)
15093 	xmlCatalogFreeLocal(ctxt->catalogs);
15094 #endif
15095     if (ctxt->lastError.code != XML_ERR_OK)
15096         xmlResetError(&ctxt->lastError);
15097 }
15098 
15099 /**
15100  * xmlCtxtResetPush:
15101  * @ctxt: an XML parser context
15102  * @chunk:  a pointer to an array of chars
15103  * @size:  number of chars in the array
15104  * @filename:  an optional file name or URI
15105  * @encoding:  the document encoding, or NULL
15106  *
15107  * Reset a push parser context
15108  *
15109  * Returns 0 in case of success and 1 in case of error
15110  */
15111 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)15112 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
15113                  int size, const char *filename, const char *encoding)
15114 {
15115     xmlParserInputPtr inputStream;
15116     xmlParserInputBufferPtr buf;
15117     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
15118 
15119     if (ctxt == NULL)
15120         return(1);
15121 
15122     if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
15123         enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
15124 
15125     buf = xmlAllocParserInputBuffer(enc);
15126     if (buf == NULL)
15127         return(1);
15128 
15129     if (ctxt == NULL) {
15130         xmlFreeParserInputBuffer(buf);
15131         return(1);
15132     }
15133 
15134     xmlCtxtReset(ctxt);
15135 
15136     if (ctxt->pushTab == NULL) {
15137         ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
15138 	                                    sizeof(xmlChar *));
15139         if (ctxt->pushTab == NULL) {
15140 	    xmlErrMemory(ctxt, NULL);
15141             xmlFreeParserInputBuffer(buf);
15142             return(1);
15143         }
15144     }
15145 
15146     if (filename == NULL) {
15147         ctxt->directory = NULL;
15148     } else {
15149         ctxt->directory = xmlParserGetDirectory(filename);
15150     }
15151 
15152     inputStream = xmlNewInputStream(ctxt);
15153     if (inputStream == NULL) {
15154         xmlFreeParserInputBuffer(buf);
15155         return(1);
15156     }
15157 
15158     if (filename == NULL)
15159         inputStream->filename = NULL;
15160     else
15161         inputStream->filename = (char *)
15162             xmlCanonicPath((const xmlChar *) filename);
15163     inputStream->buf = buf;
15164     xmlBufResetInput(buf->buffer, inputStream);
15165 
15166     inputPush(ctxt, inputStream);
15167 
15168     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
15169         (ctxt->input->buf != NULL)) {
15170 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
15171         size_t cur = ctxt->input->cur - ctxt->input->base;
15172 
15173         xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15174 
15175         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
15176 #ifdef DEBUG_PUSH
15177         xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15178 #endif
15179     }
15180 
15181     if (encoding != NULL) {
15182         xmlCharEncodingHandlerPtr hdlr;
15183 
15184         if (ctxt->encoding != NULL)
15185 	    xmlFree((xmlChar *) ctxt->encoding);
15186         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15187 
15188         hdlr = xmlFindCharEncodingHandler(encoding);
15189         if (hdlr != NULL) {
15190             xmlSwitchToEncoding(ctxt, hdlr);
15191 	} else {
15192 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15193 			      "Unsupported encoding %s\n", BAD_CAST encoding);
15194         }
15195     } else if (enc != XML_CHAR_ENCODING_NONE) {
15196         xmlSwitchEncoding(ctxt, enc);
15197     }
15198 
15199     return(0);
15200 }
15201 
15202 
15203 /**
15204  * xmlCtxtUseOptionsInternal:
15205  * @ctxt: an XML parser context
15206  * @options:  a combination of xmlParserOption
15207  * @encoding:  the user provided encoding to use
15208  *
15209  * Applies the options to the parser context
15210  *
15211  * Returns 0 in case of success, the set of unknown or unimplemented options
15212  *         in case of error.
15213  */
15214 static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt,int options,const char * encoding)15215 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15216 {
15217     if (ctxt == NULL)
15218         return(-1);
15219     if (encoding != NULL) {
15220         if (ctxt->encoding != NULL)
15221 	    xmlFree((xmlChar *) ctxt->encoding);
15222         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15223     }
15224     if (options & XML_PARSE_RECOVER) {
15225         ctxt->recovery = 1;
15226         options -= XML_PARSE_RECOVER;
15227 	ctxt->options |= XML_PARSE_RECOVER;
15228     } else
15229         ctxt->recovery = 0;
15230     if (options & XML_PARSE_DTDLOAD) {
15231         ctxt->loadsubset = XML_DETECT_IDS;
15232         options -= XML_PARSE_DTDLOAD;
15233 	ctxt->options |= XML_PARSE_DTDLOAD;
15234     } else
15235         ctxt->loadsubset = 0;
15236     if (options & XML_PARSE_DTDATTR) {
15237         ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15238         options -= XML_PARSE_DTDATTR;
15239 	ctxt->options |= XML_PARSE_DTDATTR;
15240     }
15241     if (options & XML_PARSE_NOENT) {
15242         ctxt->replaceEntities = 1;
15243         /* ctxt->loadsubset |= XML_DETECT_IDS; */
15244         options -= XML_PARSE_NOENT;
15245 	ctxt->options |= XML_PARSE_NOENT;
15246     } else
15247         ctxt->replaceEntities = 0;
15248     if (options & XML_PARSE_PEDANTIC) {
15249         ctxt->pedantic = 1;
15250         options -= XML_PARSE_PEDANTIC;
15251 	ctxt->options |= XML_PARSE_PEDANTIC;
15252     } else
15253         ctxt->pedantic = 0;
15254     if (options & XML_PARSE_NOBLANKS) {
15255         ctxt->keepBlanks = 0;
15256         ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15257         options -= XML_PARSE_NOBLANKS;
15258 	ctxt->options |= XML_PARSE_NOBLANKS;
15259     } else
15260         ctxt->keepBlanks = 1;
15261     if (options & XML_PARSE_DTDVALID) {
15262         ctxt->validate = 1;
15263         if (options & XML_PARSE_NOWARNING)
15264             ctxt->vctxt.warning = NULL;
15265         if (options & XML_PARSE_NOERROR)
15266             ctxt->vctxt.error = NULL;
15267         options -= XML_PARSE_DTDVALID;
15268 	ctxt->options |= XML_PARSE_DTDVALID;
15269     } else
15270         ctxt->validate = 0;
15271     if (options & XML_PARSE_NOWARNING) {
15272         ctxt->sax->warning = NULL;
15273         options -= XML_PARSE_NOWARNING;
15274     }
15275     if (options & XML_PARSE_NOERROR) {
15276         ctxt->sax->error = NULL;
15277         ctxt->sax->fatalError = NULL;
15278         options -= XML_PARSE_NOERROR;
15279     }
15280 #ifdef LIBXML_SAX1_ENABLED
15281     if (options & XML_PARSE_SAX1) {
15282         ctxt->sax->startElement = xmlSAX2StartElement;
15283         ctxt->sax->endElement = xmlSAX2EndElement;
15284         ctxt->sax->startElementNs = NULL;
15285         ctxt->sax->endElementNs = NULL;
15286         ctxt->sax->initialized = 1;
15287         options -= XML_PARSE_SAX1;
15288 	ctxt->options |= XML_PARSE_SAX1;
15289     }
15290 #endif /* LIBXML_SAX1_ENABLED */
15291     if (options & XML_PARSE_NODICT) {
15292         ctxt->dictNames = 0;
15293         options -= XML_PARSE_NODICT;
15294 	ctxt->options |= XML_PARSE_NODICT;
15295     } else {
15296         ctxt->dictNames = 1;
15297     }
15298     if (options & XML_PARSE_NOCDATA) {
15299         ctxt->sax->cdataBlock = NULL;
15300         options -= XML_PARSE_NOCDATA;
15301 	ctxt->options |= XML_PARSE_NOCDATA;
15302     }
15303     if (options & XML_PARSE_NSCLEAN) {
15304 	ctxt->options |= XML_PARSE_NSCLEAN;
15305         options -= XML_PARSE_NSCLEAN;
15306     }
15307     if (options & XML_PARSE_NONET) {
15308 	ctxt->options |= XML_PARSE_NONET;
15309         options -= XML_PARSE_NONET;
15310     }
15311     if (options & XML_PARSE_COMPACT) {
15312 	ctxt->options |= XML_PARSE_COMPACT;
15313         options -= XML_PARSE_COMPACT;
15314     }
15315     if (options & XML_PARSE_OLD10) {
15316 	ctxt->options |= XML_PARSE_OLD10;
15317         options -= XML_PARSE_OLD10;
15318     }
15319     if (options & XML_PARSE_NOBASEFIX) {
15320 	ctxt->options |= XML_PARSE_NOBASEFIX;
15321         options -= XML_PARSE_NOBASEFIX;
15322     }
15323     if (options & XML_PARSE_HUGE) {
15324 	ctxt->options |= XML_PARSE_HUGE;
15325         options -= XML_PARSE_HUGE;
15326         if (ctxt->dict != NULL)
15327             xmlDictSetLimit(ctxt->dict, 0);
15328     }
15329     if (options & XML_PARSE_OLDSAX) {
15330 	ctxt->options |= XML_PARSE_OLDSAX;
15331         options -= XML_PARSE_OLDSAX;
15332     }
15333     if (options & XML_PARSE_IGNORE_ENC) {
15334 	ctxt->options |= XML_PARSE_IGNORE_ENC;
15335         options -= XML_PARSE_IGNORE_ENC;
15336     }
15337     if (options & XML_PARSE_BIG_LINES) {
15338 	ctxt->options |= XML_PARSE_BIG_LINES;
15339         options -= XML_PARSE_BIG_LINES;
15340     }
15341     ctxt->linenumbers = 1;
15342     return (options);
15343 }
15344 
15345 /**
15346  * xmlCtxtUseOptions:
15347  * @ctxt: an XML parser context
15348  * @options:  a combination of xmlParserOption
15349  *
15350  * Applies the options to the parser context
15351  *
15352  * Returns 0 in case of success, the set of unknown or unimplemented options
15353  *         in case of error.
15354  */
15355 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)15356 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15357 {
15358    return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15359 }
15360 
15361 /**
15362  * xmlDoRead:
15363  * @ctxt:  an XML parser context
15364  * @URL:  the base URL to use for the document
15365  * @encoding:  the document encoding, or NULL
15366  * @options:  a combination of xmlParserOption
15367  * @reuse:  keep the context for reuse
15368  *
15369  * Common front-end for the xmlRead functions
15370  *
15371  * Returns the resulting document tree or NULL
15372  */
15373 static xmlDocPtr
xmlDoRead(xmlParserCtxtPtr ctxt,const char * URL,const char * encoding,int options,int reuse)15374 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15375           int options, int reuse)
15376 {
15377     xmlDocPtr ret;
15378 
15379     xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15380     if (encoding != NULL) {
15381         xmlCharEncodingHandlerPtr hdlr;
15382 
15383 	hdlr = xmlFindCharEncodingHandler(encoding);
15384 	if (hdlr != NULL)
15385 	    xmlSwitchToEncoding(ctxt, hdlr);
15386     }
15387     if ((URL != NULL) && (ctxt->input != NULL) &&
15388         (ctxt->input->filename == NULL))
15389         ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15390     xmlParseDocument(ctxt);
15391     if ((ctxt->wellFormed) || ctxt->recovery)
15392         ret = ctxt->myDoc;
15393     else {
15394         ret = NULL;
15395 	if (ctxt->myDoc != NULL) {
15396 	    xmlFreeDoc(ctxt->myDoc);
15397 	}
15398     }
15399     ctxt->myDoc = NULL;
15400     if (!reuse) {
15401 	xmlFreeParserCtxt(ctxt);
15402     }
15403 
15404     return (ret);
15405 }
15406 
15407 /**
15408  * xmlReadDoc:
15409  * @cur:  a pointer to a zero terminated string
15410  * @URL:  the base URL to use for the document
15411  * @encoding:  the document encoding, or NULL
15412  * @options:  a combination of xmlParserOption
15413  *
15414  * parse an XML in-memory document and build a tree.
15415  *
15416  * Returns the resulting document tree
15417  */
15418 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)15419 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15420 {
15421     xmlParserCtxtPtr ctxt;
15422 
15423     if (cur == NULL)
15424         return (NULL);
15425     xmlInitParser();
15426 
15427     ctxt = xmlCreateDocParserCtxt(cur);
15428     if (ctxt == NULL)
15429         return (NULL);
15430     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15431 }
15432 
15433 /**
15434  * xmlReadFile:
15435  * @filename:  a file or URL
15436  * @encoding:  the document encoding, or NULL
15437  * @options:  a combination of xmlParserOption
15438  *
15439  * parse an XML file from the filesystem or the network.
15440  *
15441  * Returns the resulting document tree
15442  */
15443 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)15444 xmlReadFile(const char *filename, const char *encoding, int options)
15445 {
15446     xmlParserCtxtPtr ctxt;
15447 
15448     xmlInitParser();
15449     ctxt = xmlCreateURLParserCtxt(filename, options);
15450     if (ctxt == NULL)
15451         return (NULL);
15452     return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15453 }
15454 
15455 /**
15456  * xmlReadMemory:
15457  * @buffer:  a pointer to a char array
15458  * @size:  the size of the array
15459  * @URL:  the base URL to use for the document
15460  * @encoding:  the document encoding, or NULL
15461  * @options:  a combination of xmlParserOption
15462  *
15463  * parse an XML in-memory document and build a tree.
15464  *
15465  * Returns the resulting document tree
15466  */
15467 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * URL,const char * encoding,int options)15468 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15469 {
15470     xmlParserCtxtPtr ctxt;
15471 
15472     xmlInitParser();
15473     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15474     if (ctxt == NULL)
15475         return (NULL);
15476     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15477 }
15478 
15479 /**
15480  * xmlReadFd:
15481  * @fd:  an open file descriptor
15482  * @URL:  the base URL to use for the document
15483  * @encoding:  the document encoding, or NULL
15484  * @options:  a combination of xmlParserOption
15485  *
15486  * parse an XML from a file descriptor and build a tree.
15487  * NOTE that the file descriptor will not be closed when the
15488  *      reader is closed or reset.
15489  *
15490  * Returns the resulting document tree
15491  */
15492 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)15493 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15494 {
15495     xmlParserCtxtPtr ctxt;
15496     xmlParserInputBufferPtr input;
15497     xmlParserInputPtr stream;
15498 
15499     if (fd < 0)
15500         return (NULL);
15501     xmlInitParser();
15502 
15503     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15504     if (input == NULL)
15505         return (NULL);
15506     input->closecallback = NULL;
15507     ctxt = xmlNewParserCtxt();
15508     if (ctxt == NULL) {
15509         xmlFreeParserInputBuffer(input);
15510         return (NULL);
15511     }
15512     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15513     if (stream == NULL) {
15514         xmlFreeParserInputBuffer(input);
15515 	xmlFreeParserCtxt(ctxt);
15516         return (NULL);
15517     }
15518     inputPush(ctxt, stream);
15519     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15520 }
15521 
15522 /**
15523  * xmlReadIO:
15524  * @ioread:  an I/O read function
15525  * @ioclose:  an I/O close function
15526  * @ioctx:  an I/O handler
15527  * @URL:  the base URL to use for the document
15528  * @encoding:  the document encoding, or NULL
15529  * @options:  a combination of xmlParserOption
15530  *
15531  * parse an XML document from I/O functions and source and build a tree.
15532  *
15533  * Returns the resulting document tree
15534  */
15535 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15536 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15537           void *ioctx, const char *URL, const char *encoding, int options)
15538 {
15539     xmlParserCtxtPtr ctxt;
15540     xmlParserInputBufferPtr input;
15541     xmlParserInputPtr stream;
15542 
15543     if (ioread == NULL)
15544         return (NULL);
15545     xmlInitParser();
15546 
15547     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15548                                          XML_CHAR_ENCODING_NONE);
15549     if (input == NULL) {
15550         if (ioclose != NULL)
15551             ioclose(ioctx);
15552         return (NULL);
15553     }
15554     ctxt = xmlNewParserCtxt();
15555     if (ctxt == NULL) {
15556         xmlFreeParserInputBuffer(input);
15557         return (NULL);
15558     }
15559     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15560     if (stream == NULL) {
15561         xmlFreeParserInputBuffer(input);
15562 	xmlFreeParserCtxt(ctxt);
15563         return (NULL);
15564     }
15565     inputPush(ctxt, stream);
15566     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15567 }
15568 
15569 /**
15570  * xmlCtxtReadDoc:
15571  * @ctxt:  an XML parser context
15572  * @cur:  a pointer to a zero terminated string
15573  * @URL:  the base URL to use for the document
15574  * @encoding:  the document encoding, or NULL
15575  * @options:  a combination of xmlParserOption
15576  *
15577  * parse an XML in-memory document and build a tree.
15578  * This reuses the existing @ctxt parser context
15579  *
15580  * Returns the resulting document tree
15581  */
15582 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * cur,const char * URL,const char * encoding,int options)15583 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15584                const char *URL, const char *encoding, int options)
15585 {
15586     xmlParserInputPtr stream;
15587 
15588     if (cur == NULL)
15589         return (NULL);
15590     if (ctxt == NULL)
15591         return (NULL);
15592     xmlInitParser();
15593 
15594     xmlCtxtReset(ctxt);
15595 
15596     stream = xmlNewStringInputStream(ctxt, cur);
15597     if (stream == NULL) {
15598         return (NULL);
15599     }
15600     inputPush(ctxt, stream);
15601     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15602 }
15603 
15604 /**
15605  * xmlCtxtReadFile:
15606  * @ctxt:  an XML parser context
15607  * @filename:  a file or URL
15608  * @encoding:  the document encoding, or NULL
15609  * @options:  a combination of xmlParserOption
15610  *
15611  * parse an XML file from the filesystem or the network.
15612  * This reuses the existing @ctxt parser context
15613  *
15614  * Returns the resulting document tree
15615  */
15616 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)15617 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15618                 const char *encoding, int options)
15619 {
15620     xmlParserInputPtr stream;
15621 
15622     if (filename == NULL)
15623         return (NULL);
15624     if (ctxt == NULL)
15625         return (NULL);
15626     xmlInitParser();
15627 
15628     xmlCtxtReset(ctxt);
15629 
15630     stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15631     if (stream == NULL) {
15632         return (NULL);
15633     }
15634     inputPush(ctxt, stream);
15635     return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15636 }
15637 
15638 /**
15639  * xmlCtxtReadMemory:
15640  * @ctxt:  an XML parser context
15641  * @buffer:  a pointer to a char array
15642  * @size:  the size of the array
15643  * @URL:  the base URL to use for the document
15644  * @encoding:  the document encoding, or NULL
15645  * @options:  a combination of xmlParserOption
15646  *
15647  * parse an XML in-memory document and build a tree.
15648  * This reuses the existing @ctxt parser context
15649  *
15650  * Returns the resulting document tree
15651  */
15652 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)15653 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15654                   const char *URL, const char *encoding, int options)
15655 {
15656     xmlParserInputBufferPtr input;
15657     xmlParserInputPtr stream;
15658 
15659     if (ctxt == NULL)
15660         return (NULL);
15661     if (buffer == NULL)
15662         return (NULL);
15663     xmlInitParser();
15664 
15665     xmlCtxtReset(ctxt);
15666 
15667     input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15668     if (input == NULL) {
15669 	return(NULL);
15670     }
15671 
15672     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15673     if (stream == NULL) {
15674 	xmlFreeParserInputBuffer(input);
15675 	return(NULL);
15676     }
15677 
15678     inputPush(ctxt, stream);
15679     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15680 }
15681 
15682 /**
15683  * xmlCtxtReadFd:
15684  * @ctxt:  an XML parser context
15685  * @fd:  an open file descriptor
15686  * @URL:  the base URL to use for the document
15687  * @encoding:  the document encoding, or NULL
15688  * @options:  a combination of xmlParserOption
15689  *
15690  * parse an XML from a file descriptor and build a tree.
15691  * This reuses the existing @ctxt parser context
15692  * NOTE that the file descriptor will not be closed when the
15693  *      reader is closed or reset.
15694  *
15695  * Returns the resulting document tree
15696  */
15697 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)15698 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15699               const char *URL, const char *encoding, int options)
15700 {
15701     xmlParserInputBufferPtr input;
15702     xmlParserInputPtr stream;
15703 
15704     if (fd < 0)
15705         return (NULL);
15706     if (ctxt == NULL)
15707         return (NULL);
15708     xmlInitParser();
15709 
15710     xmlCtxtReset(ctxt);
15711 
15712 
15713     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15714     if (input == NULL)
15715         return (NULL);
15716     input->closecallback = NULL;
15717     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15718     if (stream == NULL) {
15719         xmlFreeParserInputBuffer(input);
15720         return (NULL);
15721     }
15722     inputPush(ctxt, stream);
15723     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15724 }
15725 
15726 /**
15727  * xmlCtxtReadIO:
15728  * @ctxt:  an XML parser context
15729  * @ioread:  an I/O read function
15730  * @ioclose:  an I/O close function
15731  * @ioctx:  an I/O handler
15732  * @URL:  the base URL to use for the document
15733  * @encoding:  the document encoding, or NULL
15734  * @options:  a combination of xmlParserOption
15735  *
15736  * parse an XML document from I/O functions and source and build a tree.
15737  * This reuses the existing @ctxt parser context
15738  *
15739  * Returns the resulting document tree
15740  */
15741 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15742 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15743               xmlInputCloseCallback ioclose, void *ioctx,
15744 	      const char *URL,
15745               const char *encoding, int options)
15746 {
15747     xmlParserInputBufferPtr input;
15748     xmlParserInputPtr stream;
15749 
15750     if (ioread == NULL)
15751         return (NULL);
15752     if (ctxt == NULL)
15753         return (NULL);
15754     xmlInitParser();
15755 
15756     xmlCtxtReset(ctxt);
15757 
15758     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15759                                          XML_CHAR_ENCODING_NONE);
15760     if (input == NULL) {
15761         if (ioclose != NULL)
15762             ioclose(ioctx);
15763         return (NULL);
15764     }
15765     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15766     if (stream == NULL) {
15767         xmlFreeParserInputBuffer(input);
15768         return (NULL);
15769     }
15770     inputPush(ctxt, stream);
15771     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15772 }
15773 
15774 #define bottom_parser
15775 #include "elfgcchack.h"
15776