1 /*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * daniel@veillard.com
31 */
32
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
37
38 #define IN_LIBXML
39 #include "libxml.h"
40
41 #if defined(_WIN32) && !defined (__CYGWIN__)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
46
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <libxml/xmlmemory.h>
53 #include <libxml/threads.h>
54 #include <libxml/globals.h>
55 #include <libxml/tree.h>
56 #include <libxml/parser.h>
57 #include <libxml/parserInternals.h>
58 #include <libxml/valid.h>
59 #include <libxml/entities.h>
60 #include <libxml/xmlerror.h>
61 #include <libxml/encoding.h>
62 #include <libxml/xmlIO.h>
63 #include <libxml/uri.h>
64 #ifdef LIBXML_CATALOG_ENABLED
65 #include <libxml/catalog.h>
66 #endif
67 #ifdef LIBXML_SCHEMAS_ENABLED
68 #include <libxml/xmlschemastypes.h>
69 #include <libxml/relaxng.h>
70 #endif
71 #ifdef HAVE_CTYPE_H
72 #include <ctype.h>
73 #endif
74 #ifdef HAVE_STDLIB_H
75 #include <stdlib.h>
76 #endif
77 #ifdef HAVE_SYS_STAT_H
78 #include <sys/stat.h>
79 #endif
80 #ifdef HAVE_FCNTL_H
81 #include <fcntl.h>
82 #endif
83 #ifdef HAVE_UNISTD_H
84 #include <unistd.h>
85 #endif
86
87 #include "buf.h"
88 #include "enc.h"
89
90 struct _xmlStartTag {
91 const xmlChar *prefix;
92 const xmlChar *URI;
93 int line;
94 int nsNr;
95 };
96
97 static void
98 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
99
100 static xmlParserCtxtPtr
101 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
102 const xmlChar *base, xmlParserCtxtPtr pctx);
103
104 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
105
106 static int
107 xmlParseElementStart(xmlParserCtxtPtr ctxt);
108
109 static void
110 xmlParseElementEnd(xmlParserCtxtPtr ctxt);
111
112 /************************************************************************
113 * *
114 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
115 * *
116 ************************************************************************/
117
118 #define XML_PARSER_BIG_ENTITY 1000
119 #define XML_PARSER_LOT_ENTITY 5000
120
121 /*
122 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
123 * replacement over the size in byte of the input indicates that you have
124 * and exponential behaviour. A value of 10 correspond to at least 3 entity
125 * replacement per byte of input.
126 */
127 #define XML_PARSER_NON_LINEAR 10
128
129 /*
130 * xmlParserEntityCheck
131 *
132 * Function to check non-linear entity expansion behaviour
133 * This is here to detect and stop exponential linear entity expansion
134 * This is not a limitation of the parser but a safety
135 * boundary feature. It can be disabled with the XML_PARSE_HUGE
136 * parser option.
137 */
138 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,size_t size,xmlEntityPtr ent,size_t replacement)139 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
140 xmlEntityPtr ent, size_t replacement)
141 {
142 size_t consumed = 0;
143 int i;
144
145 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
146 return (0);
147 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
148 return (1);
149
150 /*
151 * This may look absurd but is needed to detect
152 * entities problems
153 */
154 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
155 (ent->content != NULL) && (ent->checked == 0) &&
156 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
157 unsigned long oldnbent = ctxt->nbentities, diff;
158 xmlChar *rep;
159
160 ent->checked = 1;
161
162 ++ctxt->depth;
163 rep = xmlStringDecodeEntities(ctxt, ent->content,
164 XML_SUBSTITUTE_REF, 0, 0, 0);
165 --ctxt->depth;
166 if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
167 ent->content[0] = 0;
168 }
169
170 diff = ctxt->nbentities - oldnbent + 1;
171 if (diff > INT_MAX / 2)
172 diff = INT_MAX / 2;
173 ent->checked = diff * 2;
174 if (rep != NULL) {
175 if (xmlStrchr(rep, '<'))
176 ent->checked |= 1;
177 xmlFree(rep);
178 rep = NULL;
179 }
180 }
181
182 /*
183 * Prevent entity exponential check, not just replacement while
184 * parsing the DTD
185 * The check is potentially costly so do that only once in a thousand
186 */
187 if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
188 (ctxt->nbentities % 1024 == 0)) {
189 for (i = 0;i < ctxt->inputNr;i++) {
190 consumed += ctxt->inputTab[i]->consumed +
191 (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
192 }
193 if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
194 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
195 ctxt->instate = XML_PARSER_EOF;
196 return (1);
197 }
198 consumed = 0;
199 }
200
201
202
203 if (replacement != 0) {
204 if (replacement < XML_MAX_TEXT_LENGTH)
205 return(0);
206
207 /*
208 * If the volume of entity copy reaches 10 times the
209 * amount of parsed data and over the large text threshold
210 * then that's very likely to be an abuse.
211 */
212 if (ctxt->input != NULL) {
213 consumed = ctxt->input->consumed +
214 (ctxt->input->cur - ctxt->input->base);
215 }
216 consumed += ctxt->sizeentities;
217
218 if (replacement < XML_PARSER_NON_LINEAR * consumed)
219 return(0);
220 } else if (size != 0) {
221 /*
222 * Do the check based on the replacement size of the entity
223 */
224 if (size < XML_PARSER_BIG_ENTITY)
225 return(0);
226
227 /*
228 * A limit on the amount of text data reasonably used
229 */
230 if (ctxt->input != NULL) {
231 consumed = ctxt->input->consumed +
232 (ctxt->input->cur - ctxt->input->base);
233 }
234 consumed += ctxt->sizeentities;
235
236 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
237 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
238 return (0);
239 } else if (ent != NULL) {
240 /*
241 * use the number of parsed entities in the replacement
242 */
243 size = ent->checked / 2;
244
245 /*
246 * The amount of data parsed counting entities size only once
247 */
248 if (ctxt->input != NULL) {
249 consumed = ctxt->input->consumed +
250 (ctxt->input->cur - ctxt->input->base);
251 }
252 consumed += ctxt->sizeentities;
253
254 /*
255 * Check the density of entities for the amount of data
256 * knowing an entity reference will take at least 3 bytes
257 */
258 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
259 return (0);
260 } else {
261 /*
262 * strange we got no data for checking
263 */
264 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
265 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
266 (ctxt->nbentities <= 10000))
267 return (0);
268 }
269 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
270 return (1);
271 }
272
273 /**
274 * xmlParserMaxDepth:
275 *
276 * arbitrary depth limit for the XML documents that we allow to
277 * process. This is not a limitation of the parser but a safety
278 * boundary feature. It can be disabled with the XML_PARSE_HUGE
279 * parser option.
280 */
281 unsigned int xmlParserMaxDepth = 256;
282
283
284
285 #define SAX2 1
286 #define XML_PARSER_BIG_BUFFER_SIZE 300
287 #define XML_PARSER_BUFFER_SIZE 100
288 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
289
290 /**
291 * XML_PARSER_CHUNK_SIZE
292 *
293 * When calling GROW that's the minimal amount of data
294 * the parser expected to have received. It is not a hard
295 * limit but an optimization when reading strings like Names
296 * It is not strictly needed as long as inputs available characters
297 * are followed by 0, which should be provided by the I/O level
298 */
299 #define XML_PARSER_CHUNK_SIZE 100
300
301 /*
302 * List of XML prefixed PI allowed by W3C specs
303 */
304
305 static const char *xmlW3CPIs[] = {
306 "xml-stylesheet",
307 "xml-model",
308 NULL
309 };
310
311
312 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
313 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
314 const xmlChar **str);
315
316 static xmlParserErrors
317 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
318 xmlSAXHandlerPtr sax,
319 void *user_data, int depth, const xmlChar *URL,
320 const xmlChar *ID, xmlNodePtr *list);
321
322 static int
323 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
324 const char *encoding);
325 #ifdef LIBXML_LEGACY_ENABLED
326 static void
327 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
328 xmlNodePtr lastNode);
329 #endif /* LIBXML_LEGACY_ENABLED */
330
331 static xmlParserErrors
332 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
333 const xmlChar *string, void *user_data, xmlNodePtr *lst);
334
335 static int
336 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
337
338 /************************************************************************
339 * *
340 * Some factorized error routines *
341 * *
342 ************************************************************************/
343
344 /**
345 * xmlErrAttributeDup:
346 * @ctxt: an XML parser context
347 * @prefix: the attribute prefix
348 * @localname: the attribute localname
349 *
350 * Handle a redefinition of attribute error
351 */
352 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)353 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
354 const xmlChar * localname)
355 {
356 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
357 (ctxt->instate == XML_PARSER_EOF))
358 return;
359 if (ctxt != NULL)
360 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
361
362 if (prefix == NULL)
363 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
364 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
365 (const char *) localname, NULL, NULL, 0, 0,
366 "Attribute %s redefined\n", localname);
367 else
368 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
369 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
370 (const char *) prefix, (const char *) localname,
371 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
372 localname);
373 if (ctxt != NULL) {
374 ctxt->wellFormed = 0;
375 if (ctxt->recovery == 0)
376 ctxt->disableSAX = 1;
377 }
378 }
379
380 /**
381 * xmlFatalErr:
382 * @ctxt: an XML parser context
383 * @error: the error number
384 * @extra: extra information string
385 *
386 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
387 */
388 static void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * info)389 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
390 {
391 const char *errmsg;
392
393 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
394 (ctxt->instate == XML_PARSER_EOF))
395 return;
396 switch (error) {
397 case XML_ERR_INVALID_HEX_CHARREF:
398 errmsg = "CharRef: invalid hexadecimal value";
399 break;
400 case XML_ERR_INVALID_DEC_CHARREF:
401 errmsg = "CharRef: invalid decimal value";
402 break;
403 case XML_ERR_INVALID_CHARREF:
404 errmsg = "CharRef: invalid value";
405 break;
406 case XML_ERR_INTERNAL_ERROR:
407 errmsg = "internal error";
408 break;
409 case XML_ERR_PEREF_AT_EOF:
410 errmsg = "PEReference at end of document";
411 break;
412 case XML_ERR_PEREF_IN_PROLOG:
413 errmsg = "PEReference in prolog";
414 break;
415 case XML_ERR_PEREF_IN_EPILOG:
416 errmsg = "PEReference in epilog";
417 break;
418 case XML_ERR_PEREF_NO_NAME:
419 errmsg = "PEReference: no name";
420 break;
421 case XML_ERR_PEREF_SEMICOL_MISSING:
422 errmsg = "PEReference: expecting ';'";
423 break;
424 case XML_ERR_ENTITY_LOOP:
425 errmsg = "Detected an entity reference loop";
426 break;
427 case XML_ERR_ENTITY_NOT_STARTED:
428 errmsg = "EntityValue: \" or ' expected";
429 break;
430 case XML_ERR_ENTITY_PE_INTERNAL:
431 errmsg = "PEReferences forbidden in internal subset";
432 break;
433 case XML_ERR_ENTITY_NOT_FINISHED:
434 errmsg = "EntityValue: \" or ' expected";
435 break;
436 case XML_ERR_ATTRIBUTE_NOT_STARTED:
437 errmsg = "AttValue: \" or ' expected";
438 break;
439 case XML_ERR_LT_IN_ATTRIBUTE:
440 errmsg = "Unescaped '<' not allowed in attributes values";
441 break;
442 case XML_ERR_LITERAL_NOT_STARTED:
443 errmsg = "SystemLiteral \" or ' expected";
444 break;
445 case XML_ERR_LITERAL_NOT_FINISHED:
446 errmsg = "Unfinished System or Public ID \" or ' expected";
447 break;
448 case XML_ERR_MISPLACED_CDATA_END:
449 errmsg = "Sequence ']]>' not allowed in content";
450 break;
451 case XML_ERR_URI_REQUIRED:
452 errmsg = "SYSTEM or PUBLIC, the URI is missing";
453 break;
454 case XML_ERR_PUBID_REQUIRED:
455 errmsg = "PUBLIC, the Public Identifier is missing";
456 break;
457 case XML_ERR_HYPHEN_IN_COMMENT:
458 errmsg = "Comment must not contain '--' (double-hyphen)";
459 break;
460 case XML_ERR_PI_NOT_STARTED:
461 errmsg = "xmlParsePI : no target name";
462 break;
463 case XML_ERR_RESERVED_XML_NAME:
464 errmsg = "Invalid PI name";
465 break;
466 case XML_ERR_NOTATION_NOT_STARTED:
467 errmsg = "NOTATION: Name expected here";
468 break;
469 case XML_ERR_NOTATION_NOT_FINISHED:
470 errmsg = "'>' required to close NOTATION declaration";
471 break;
472 case XML_ERR_VALUE_REQUIRED:
473 errmsg = "Entity value required";
474 break;
475 case XML_ERR_URI_FRAGMENT:
476 errmsg = "Fragment not allowed";
477 break;
478 case XML_ERR_ATTLIST_NOT_STARTED:
479 errmsg = "'(' required to start ATTLIST enumeration";
480 break;
481 case XML_ERR_NMTOKEN_REQUIRED:
482 errmsg = "NmToken expected in ATTLIST enumeration";
483 break;
484 case XML_ERR_ATTLIST_NOT_FINISHED:
485 errmsg = "')' required to finish ATTLIST enumeration";
486 break;
487 case XML_ERR_MIXED_NOT_STARTED:
488 errmsg = "MixedContentDecl : '|' or ')*' expected";
489 break;
490 case XML_ERR_PCDATA_REQUIRED:
491 errmsg = "MixedContentDecl : '#PCDATA' expected";
492 break;
493 case XML_ERR_ELEMCONTENT_NOT_STARTED:
494 errmsg = "ContentDecl : Name or '(' expected";
495 break;
496 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
497 errmsg = "ContentDecl : ',' '|' or ')' expected";
498 break;
499 case XML_ERR_PEREF_IN_INT_SUBSET:
500 errmsg =
501 "PEReference: forbidden within markup decl in internal subset";
502 break;
503 case XML_ERR_GT_REQUIRED:
504 errmsg = "expected '>'";
505 break;
506 case XML_ERR_CONDSEC_INVALID:
507 errmsg = "XML conditional section '[' expected";
508 break;
509 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
510 errmsg = "Content error in the external subset";
511 break;
512 case XML_ERR_CONDSEC_INVALID_KEYWORD:
513 errmsg =
514 "conditional section INCLUDE or IGNORE keyword expected";
515 break;
516 case XML_ERR_CONDSEC_NOT_FINISHED:
517 errmsg = "XML conditional section not closed";
518 break;
519 case XML_ERR_XMLDECL_NOT_STARTED:
520 errmsg = "Text declaration '<?xml' required";
521 break;
522 case XML_ERR_XMLDECL_NOT_FINISHED:
523 errmsg = "parsing XML declaration: '?>' expected";
524 break;
525 case XML_ERR_EXT_ENTITY_STANDALONE:
526 errmsg = "external parsed entities cannot be standalone";
527 break;
528 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
529 errmsg = "EntityRef: expecting ';'";
530 break;
531 case XML_ERR_DOCTYPE_NOT_FINISHED:
532 errmsg = "DOCTYPE improperly terminated";
533 break;
534 case XML_ERR_LTSLASH_REQUIRED:
535 errmsg = "EndTag: '</' not found";
536 break;
537 case XML_ERR_EQUAL_REQUIRED:
538 errmsg = "expected '='";
539 break;
540 case XML_ERR_STRING_NOT_CLOSED:
541 errmsg = "String not closed expecting \" or '";
542 break;
543 case XML_ERR_STRING_NOT_STARTED:
544 errmsg = "String not started expecting ' or \"";
545 break;
546 case XML_ERR_ENCODING_NAME:
547 errmsg = "Invalid XML encoding name";
548 break;
549 case XML_ERR_STANDALONE_VALUE:
550 errmsg = "standalone accepts only 'yes' or 'no'";
551 break;
552 case XML_ERR_DOCUMENT_EMPTY:
553 errmsg = "Document is empty";
554 break;
555 case XML_ERR_DOCUMENT_END:
556 errmsg = "Extra content at the end of the document";
557 break;
558 case XML_ERR_NOT_WELL_BALANCED:
559 errmsg = "chunk is not well balanced";
560 break;
561 case XML_ERR_EXTRA_CONTENT:
562 errmsg = "extra content at the end of well balanced chunk";
563 break;
564 case XML_ERR_VERSION_MISSING:
565 errmsg = "Malformed declaration expecting version";
566 break;
567 case XML_ERR_NAME_TOO_LONG:
568 errmsg = "Name too long use XML_PARSE_HUGE option";
569 break;
570 #if 0
571 case:
572 errmsg = "";
573 break;
574 #endif
575 default:
576 errmsg = "Unregistered error message";
577 }
578 if (ctxt != NULL)
579 ctxt->errNo = error;
580 if (info == NULL) {
581 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
582 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
583 errmsg);
584 } else {
585 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
586 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
587 errmsg, info);
588 }
589 if (ctxt != NULL) {
590 ctxt->wellFormed = 0;
591 if (ctxt->recovery == 0)
592 ctxt->disableSAX = 1;
593 }
594 }
595
596 /**
597 * xmlFatalErrMsg:
598 * @ctxt: an XML parser context
599 * @error: the error number
600 * @msg: the error message
601 *
602 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
603 */
604 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)605 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
606 const char *msg)
607 {
608 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
609 (ctxt->instate == XML_PARSER_EOF))
610 return;
611 if (ctxt != NULL)
612 ctxt->errNo = error;
613 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
614 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
615 if (ctxt != NULL) {
616 ctxt->wellFormed = 0;
617 if (ctxt->recovery == 0)
618 ctxt->disableSAX = 1;
619 }
620 }
621
622 /**
623 * xmlWarningMsg:
624 * @ctxt: an XML parser context
625 * @error: the error number
626 * @msg: the error message
627 * @str1: extra data
628 * @str2: extra data
629 *
630 * Handle a warning.
631 */
632 static void LIBXML_ATTR_FORMAT(3,0)
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)633 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
634 const char *msg, const xmlChar *str1, const xmlChar *str2)
635 {
636 xmlStructuredErrorFunc schannel = NULL;
637
638 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
639 (ctxt->instate == XML_PARSER_EOF))
640 return;
641 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
642 (ctxt->sax->initialized == XML_SAX2_MAGIC))
643 schannel = ctxt->sax->serror;
644 if (ctxt != NULL) {
645 __xmlRaiseError(schannel,
646 (ctxt->sax) ? ctxt->sax->warning : NULL,
647 ctxt->userData,
648 ctxt, NULL, XML_FROM_PARSER, error,
649 XML_ERR_WARNING, NULL, 0,
650 (const char *) str1, (const char *) str2, NULL, 0, 0,
651 msg, (const char *) str1, (const char *) str2);
652 } else {
653 __xmlRaiseError(schannel, NULL, NULL,
654 ctxt, NULL, XML_FROM_PARSER, error,
655 XML_ERR_WARNING, NULL, 0,
656 (const char *) str1, (const char *) str2, NULL, 0, 0,
657 msg, (const char *) str1, (const char *) str2);
658 }
659 }
660
661 /**
662 * xmlValidityError:
663 * @ctxt: an XML parser context
664 * @error: the error number
665 * @msg: the error message
666 * @str1: extra data
667 *
668 * Handle a validity error.
669 */
670 static void LIBXML_ATTR_FORMAT(3,0)
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)671 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
672 const char *msg, const xmlChar *str1, const xmlChar *str2)
673 {
674 xmlStructuredErrorFunc schannel = NULL;
675
676 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
677 (ctxt->instate == XML_PARSER_EOF))
678 return;
679 if (ctxt != NULL) {
680 ctxt->errNo = error;
681 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
682 schannel = ctxt->sax->serror;
683 }
684 if (ctxt != NULL) {
685 __xmlRaiseError(schannel,
686 ctxt->vctxt.error, ctxt->vctxt.userData,
687 ctxt, NULL, XML_FROM_DTD, error,
688 XML_ERR_ERROR, NULL, 0, (const char *) str1,
689 (const char *) str2, NULL, 0, 0,
690 msg, (const char *) str1, (const char *) str2);
691 ctxt->valid = 0;
692 } else {
693 __xmlRaiseError(schannel, NULL, NULL,
694 ctxt, NULL, XML_FROM_DTD, error,
695 XML_ERR_ERROR, NULL, 0, (const char *) str1,
696 (const char *) str2, NULL, 0, 0,
697 msg, (const char *) str1, (const char *) str2);
698 }
699 }
700
701 /**
702 * xmlFatalErrMsgInt:
703 * @ctxt: an XML parser context
704 * @error: the error number
705 * @msg: the error message
706 * @val: an integer value
707 *
708 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
709 */
710 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)711 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
712 const char *msg, int val)
713 {
714 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
715 (ctxt->instate == XML_PARSER_EOF))
716 return;
717 if (ctxt != NULL)
718 ctxt->errNo = error;
719 __xmlRaiseError(NULL, NULL, NULL,
720 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
721 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
722 if (ctxt != NULL) {
723 ctxt->wellFormed = 0;
724 if (ctxt->recovery == 0)
725 ctxt->disableSAX = 1;
726 }
727 }
728
729 /**
730 * xmlFatalErrMsgStrIntStr:
731 * @ctxt: an XML parser context
732 * @error: the error number
733 * @msg: the error message
734 * @str1: an string info
735 * @val: an integer value
736 * @str2: an string info
737 *
738 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
739 */
740 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)741 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
742 const char *msg, const xmlChar *str1, int val,
743 const xmlChar *str2)
744 {
745 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
746 (ctxt->instate == XML_PARSER_EOF))
747 return;
748 if (ctxt != NULL)
749 ctxt->errNo = error;
750 __xmlRaiseError(NULL, NULL, NULL,
751 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
752 NULL, 0, (const char *) str1, (const char *) str2,
753 NULL, val, 0, msg, str1, val, str2);
754 if (ctxt != NULL) {
755 ctxt->wellFormed = 0;
756 if (ctxt->recovery == 0)
757 ctxt->disableSAX = 1;
758 }
759 }
760
761 /**
762 * xmlFatalErrMsgStr:
763 * @ctxt: an XML parser context
764 * @error: the error number
765 * @msg: the error message
766 * @val: a string value
767 *
768 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
769 */
770 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)771 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
772 const char *msg, const xmlChar * val)
773 {
774 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
775 (ctxt->instate == XML_PARSER_EOF))
776 return;
777 if (ctxt != NULL)
778 ctxt->errNo = error;
779 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
780 XML_FROM_PARSER, error, XML_ERR_FATAL,
781 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
782 val);
783 if (ctxt != NULL) {
784 ctxt->wellFormed = 0;
785 if (ctxt->recovery == 0)
786 ctxt->disableSAX = 1;
787 }
788 }
789
790 /**
791 * xmlErrMsgStr:
792 * @ctxt: an XML parser context
793 * @error: the error number
794 * @msg: the error message
795 * @val: a string value
796 *
797 * Handle a non fatal parser error
798 */
799 static void LIBXML_ATTR_FORMAT(3,0)
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)800 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
801 const char *msg, const xmlChar * val)
802 {
803 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
804 (ctxt->instate == XML_PARSER_EOF))
805 return;
806 if (ctxt != NULL)
807 ctxt->errNo = error;
808 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
809 XML_FROM_PARSER, error, XML_ERR_ERROR,
810 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
811 val);
812 }
813
814 /**
815 * xmlNsErr:
816 * @ctxt: an XML parser context
817 * @error: the error number
818 * @msg: the message
819 * @info1: extra information string
820 * @info2: extra information string
821 *
822 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
823 */
824 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)825 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
826 const char *msg,
827 const xmlChar * info1, const xmlChar * info2,
828 const xmlChar * info3)
829 {
830 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
831 (ctxt->instate == XML_PARSER_EOF))
832 return;
833 if (ctxt != NULL)
834 ctxt->errNo = error;
835 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
836 XML_ERR_ERROR, NULL, 0, (const char *) info1,
837 (const char *) info2, (const char *) info3, 0, 0, msg,
838 info1, info2, info3);
839 if (ctxt != NULL)
840 ctxt->nsWellFormed = 0;
841 }
842
843 /**
844 * xmlNsWarn
845 * @ctxt: an XML parser context
846 * @error: the error number
847 * @msg: the message
848 * @info1: extra information string
849 * @info2: extra information string
850 *
851 * Handle a namespace warning error
852 */
853 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)854 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
855 const char *msg,
856 const xmlChar * info1, const xmlChar * info2,
857 const xmlChar * info3)
858 {
859 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
860 (ctxt->instate == XML_PARSER_EOF))
861 return;
862 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
863 XML_ERR_WARNING, NULL, 0, (const char *) info1,
864 (const char *) info2, (const char *) info3, 0, 0, msg,
865 info1, info2, info3);
866 }
867
868 /************************************************************************
869 * *
870 * Library wide options *
871 * *
872 ************************************************************************/
873
874 /**
875 * xmlHasFeature:
876 * @feature: the feature to be examined
877 *
878 * Examines if the library has been compiled with a given feature.
879 *
880 * Returns a non-zero value if the feature exist, otherwise zero.
881 * Returns zero (0) if the feature does not exist or an unknown
882 * unknown feature is requested, non-zero otherwise.
883 */
884 int
xmlHasFeature(xmlFeature feature)885 xmlHasFeature(xmlFeature feature)
886 {
887 switch (feature) {
888 case XML_WITH_THREAD:
889 #ifdef LIBXML_THREAD_ENABLED
890 return(1);
891 #else
892 return(0);
893 #endif
894 case XML_WITH_TREE:
895 #ifdef LIBXML_TREE_ENABLED
896 return(1);
897 #else
898 return(0);
899 #endif
900 case XML_WITH_OUTPUT:
901 #ifdef LIBXML_OUTPUT_ENABLED
902 return(1);
903 #else
904 return(0);
905 #endif
906 case XML_WITH_PUSH:
907 #ifdef LIBXML_PUSH_ENABLED
908 return(1);
909 #else
910 return(0);
911 #endif
912 case XML_WITH_READER:
913 #ifdef LIBXML_READER_ENABLED
914 return(1);
915 #else
916 return(0);
917 #endif
918 case XML_WITH_PATTERN:
919 #ifdef LIBXML_PATTERN_ENABLED
920 return(1);
921 #else
922 return(0);
923 #endif
924 case XML_WITH_WRITER:
925 #ifdef LIBXML_WRITER_ENABLED
926 return(1);
927 #else
928 return(0);
929 #endif
930 case XML_WITH_SAX1:
931 #ifdef LIBXML_SAX1_ENABLED
932 return(1);
933 #else
934 return(0);
935 #endif
936 case XML_WITH_FTP:
937 #ifdef LIBXML_FTP_ENABLED
938 return(1);
939 #else
940 return(0);
941 #endif
942 case XML_WITH_HTTP:
943 #ifdef LIBXML_HTTP_ENABLED
944 return(1);
945 #else
946 return(0);
947 #endif
948 case XML_WITH_VALID:
949 #ifdef LIBXML_VALID_ENABLED
950 return(1);
951 #else
952 return(0);
953 #endif
954 case XML_WITH_HTML:
955 #ifdef LIBXML_HTML_ENABLED
956 return(1);
957 #else
958 return(0);
959 #endif
960 case XML_WITH_LEGACY:
961 #ifdef LIBXML_LEGACY_ENABLED
962 return(1);
963 #else
964 return(0);
965 #endif
966 case XML_WITH_C14N:
967 #ifdef LIBXML_C14N_ENABLED
968 return(1);
969 #else
970 return(0);
971 #endif
972 case XML_WITH_CATALOG:
973 #ifdef LIBXML_CATALOG_ENABLED
974 return(1);
975 #else
976 return(0);
977 #endif
978 case XML_WITH_XPATH:
979 #ifdef LIBXML_XPATH_ENABLED
980 return(1);
981 #else
982 return(0);
983 #endif
984 case XML_WITH_XPTR:
985 #ifdef LIBXML_XPTR_ENABLED
986 return(1);
987 #else
988 return(0);
989 #endif
990 case XML_WITH_XINCLUDE:
991 #ifdef LIBXML_XINCLUDE_ENABLED
992 return(1);
993 #else
994 return(0);
995 #endif
996 case XML_WITH_ICONV:
997 #ifdef LIBXML_ICONV_ENABLED
998 return(1);
999 #else
1000 return(0);
1001 #endif
1002 case XML_WITH_ISO8859X:
1003 #ifdef LIBXML_ISO8859X_ENABLED
1004 return(1);
1005 #else
1006 return(0);
1007 #endif
1008 case XML_WITH_UNICODE:
1009 #ifdef LIBXML_UNICODE_ENABLED
1010 return(1);
1011 #else
1012 return(0);
1013 #endif
1014 case XML_WITH_REGEXP:
1015 #ifdef LIBXML_REGEXP_ENABLED
1016 return(1);
1017 #else
1018 return(0);
1019 #endif
1020 case XML_WITH_AUTOMATA:
1021 #ifdef LIBXML_AUTOMATA_ENABLED
1022 return(1);
1023 #else
1024 return(0);
1025 #endif
1026 case XML_WITH_EXPR:
1027 #ifdef LIBXML_EXPR_ENABLED
1028 return(1);
1029 #else
1030 return(0);
1031 #endif
1032 case XML_WITH_SCHEMAS:
1033 #ifdef LIBXML_SCHEMAS_ENABLED
1034 return(1);
1035 #else
1036 return(0);
1037 #endif
1038 case XML_WITH_SCHEMATRON:
1039 #ifdef LIBXML_SCHEMATRON_ENABLED
1040 return(1);
1041 #else
1042 return(0);
1043 #endif
1044 case XML_WITH_MODULES:
1045 #ifdef LIBXML_MODULES_ENABLED
1046 return(1);
1047 #else
1048 return(0);
1049 #endif
1050 case XML_WITH_DEBUG:
1051 #ifdef LIBXML_DEBUG_ENABLED
1052 return(1);
1053 #else
1054 return(0);
1055 #endif
1056 case XML_WITH_DEBUG_MEM:
1057 #ifdef DEBUG_MEMORY_LOCATION
1058 return(1);
1059 #else
1060 return(0);
1061 #endif
1062 case XML_WITH_DEBUG_RUN:
1063 #ifdef LIBXML_DEBUG_RUNTIME
1064 return(1);
1065 #else
1066 return(0);
1067 #endif
1068 case XML_WITH_ZLIB:
1069 #ifdef LIBXML_ZLIB_ENABLED
1070 return(1);
1071 #else
1072 return(0);
1073 #endif
1074 case XML_WITH_LZMA:
1075 #ifdef LIBXML_LZMA_ENABLED
1076 return(1);
1077 #else
1078 return(0);
1079 #endif
1080 case XML_WITH_ICU:
1081 #ifdef LIBXML_ICU_ENABLED
1082 return(1);
1083 #else
1084 return(0);
1085 #endif
1086 default:
1087 break;
1088 }
1089 return(0);
1090 }
1091
1092 /************************************************************************
1093 * *
1094 * SAX2 defaulted attributes handling *
1095 * *
1096 ************************************************************************/
1097
1098 /**
1099 * xmlDetectSAX2:
1100 * @ctxt: an XML parser context
1101 *
1102 * Do the SAX2 detection and specific initialization
1103 */
1104 static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt)1105 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1106 xmlSAXHandlerPtr sax;
1107 if (ctxt == NULL) return;
1108 sax = ctxt->sax;
1109 #ifdef LIBXML_SAX1_ENABLED
1110 if ((sax) && (sax->initialized == XML_SAX2_MAGIC) &&
1111 ((sax->startElementNs != NULL) ||
1112 (sax->endElementNs != NULL) ||
1113 ((sax->startElement == NULL) && (sax->endElement == NULL))))
1114 ctxt->sax2 = 1;
1115 #else
1116 ctxt->sax2 = 1;
1117 #endif /* LIBXML_SAX1_ENABLED */
1118
1119 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1120 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1121 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1122 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1123 (ctxt->str_xml_ns == NULL)) {
1124 xmlErrMemory(ctxt, NULL);
1125 }
1126 }
1127
1128 typedef struct _xmlDefAttrs xmlDefAttrs;
1129 typedef xmlDefAttrs *xmlDefAttrsPtr;
1130 struct _xmlDefAttrs {
1131 int nbAttrs; /* number of defaulted attributes on that element */
1132 int maxAttrs; /* the size of the array */
1133 #if __STDC_VERSION__ >= 199901L
1134 /* Using a C99 flexible array member avoids UBSan errors. */
1135 const xmlChar *values[]; /* array of localname/prefix/values/external */
1136 #else
1137 const xmlChar *values[5];
1138 #endif
1139 };
1140
1141 /**
1142 * xmlAttrNormalizeSpace:
1143 * @src: the source string
1144 * @dst: the target string
1145 *
1146 * Normalize the space in non CDATA attribute values:
1147 * If the attribute type is not CDATA, then the XML processor MUST further
1148 * process the normalized attribute value by discarding any leading and
1149 * trailing space (#x20) characters, and by replacing sequences of space
1150 * (#x20) characters by a single space (#x20) character.
1151 * Note that the size of dst need to be at least src, and if one doesn't need
1152 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1153 * passing src as dst is just fine.
1154 *
1155 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1156 * is needed.
1157 */
1158 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1159 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1160 {
1161 if ((src == NULL) || (dst == NULL))
1162 return(NULL);
1163
1164 while (*src == 0x20) src++;
1165 while (*src != 0) {
1166 if (*src == 0x20) {
1167 while (*src == 0x20) src++;
1168 if (*src != 0)
1169 *dst++ = 0x20;
1170 } else {
1171 *dst++ = *src++;
1172 }
1173 }
1174 *dst = 0;
1175 if (dst == src)
1176 return(NULL);
1177 return(dst);
1178 }
1179
1180 /**
1181 * xmlAttrNormalizeSpace2:
1182 * @src: the source string
1183 *
1184 * Normalize the space in non CDATA attribute values, a slightly more complex
1185 * front end to avoid allocation problems when running on attribute values
1186 * coming from the input.
1187 *
1188 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1189 * is needed.
1190 */
1191 static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt,xmlChar * src,int * len)1192 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1193 {
1194 int i;
1195 int remove_head = 0;
1196 int need_realloc = 0;
1197 const xmlChar *cur;
1198
1199 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1200 return(NULL);
1201 i = *len;
1202 if (i <= 0)
1203 return(NULL);
1204
1205 cur = src;
1206 while (*cur == 0x20) {
1207 cur++;
1208 remove_head++;
1209 }
1210 while (*cur != 0) {
1211 if (*cur == 0x20) {
1212 cur++;
1213 if ((*cur == 0x20) || (*cur == 0)) {
1214 need_realloc = 1;
1215 break;
1216 }
1217 } else
1218 cur++;
1219 }
1220 if (need_realloc) {
1221 xmlChar *ret;
1222
1223 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1224 if (ret == NULL) {
1225 xmlErrMemory(ctxt, NULL);
1226 return(NULL);
1227 }
1228 xmlAttrNormalizeSpace(ret, ret);
1229 *len = (int) strlen((const char *)ret);
1230 return(ret);
1231 } else if (remove_head) {
1232 *len -= remove_head;
1233 memmove(src, src + remove_head, 1 + *len);
1234 return(src);
1235 }
1236 return(NULL);
1237 }
1238
1239 /**
1240 * xmlAddDefAttrs:
1241 * @ctxt: an XML parser context
1242 * @fullname: the element fullname
1243 * @fullattr: the attribute fullname
1244 * @value: the attribute value
1245 *
1246 * Add a defaulted attribute for an element
1247 */
1248 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1249 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1250 const xmlChar *fullname,
1251 const xmlChar *fullattr,
1252 const xmlChar *value) {
1253 xmlDefAttrsPtr defaults;
1254 int len;
1255 const xmlChar *name;
1256 const xmlChar *prefix;
1257
1258 /*
1259 * Allows to detect attribute redefinitions
1260 */
1261 if (ctxt->attsSpecial != NULL) {
1262 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1263 return;
1264 }
1265
1266 if (ctxt->attsDefault == NULL) {
1267 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1268 if (ctxt->attsDefault == NULL)
1269 goto mem_error;
1270 }
1271
1272 /*
1273 * split the element name into prefix:localname , the string found
1274 * are within the DTD and then not associated to namespace names.
1275 */
1276 name = xmlSplitQName3(fullname, &len);
1277 if (name == NULL) {
1278 name = xmlDictLookup(ctxt->dict, fullname, -1);
1279 prefix = NULL;
1280 } else {
1281 name = xmlDictLookup(ctxt->dict, name, -1);
1282 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1283 }
1284
1285 /*
1286 * make sure there is some storage
1287 */
1288 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1289 if (defaults == NULL) {
1290 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1291 (4 * 5) * sizeof(const xmlChar *));
1292 if (defaults == NULL)
1293 goto mem_error;
1294 defaults->nbAttrs = 0;
1295 defaults->maxAttrs = 4;
1296 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1297 defaults, NULL) < 0) {
1298 xmlFree(defaults);
1299 goto mem_error;
1300 }
1301 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1302 xmlDefAttrsPtr temp;
1303
1304 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1305 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1306 if (temp == NULL)
1307 goto mem_error;
1308 defaults = temp;
1309 defaults->maxAttrs *= 2;
1310 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1311 defaults, NULL) < 0) {
1312 xmlFree(defaults);
1313 goto mem_error;
1314 }
1315 }
1316
1317 /*
1318 * Split the element name into prefix:localname , the string found
1319 * are within the DTD and hen not associated to namespace names.
1320 */
1321 name = xmlSplitQName3(fullattr, &len);
1322 if (name == NULL) {
1323 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1324 prefix = NULL;
1325 } else {
1326 name = xmlDictLookup(ctxt->dict, name, -1);
1327 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1328 }
1329
1330 defaults->values[5 * defaults->nbAttrs] = name;
1331 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1332 /* intern the string and precompute the end */
1333 len = xmlStrlen(value);
1334 value = xmlDictLookup(ctxt->dict, value, len);
1335 defaults->values[5 * defaults->nbAttrs + 2] = value;
1336 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1337 if (ctxt->external)
1338 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1339 else
1340 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1341 defaults->nbAttrs++;
1342
1343 return;
1344
1345 mem_error:
1346 xmlErrMemory(ctxt, NULL);
1347 return;
1348 }
1349
1350 /**
1351 * xmlAddSpecialAttr:
1352 * @ctxt: an XML parser context
1353 * @fullname: the element fullname
1354 * @fullattr: the attribute fullname
1355 * @type: the attribute type
1356 *
1357 * Register this attribute type
1358 */
1359 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1360 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1361 const xmlChar *fullname,
1362 const xmlChar *fullattr,
1363 int type)
1364 {
1365 if (ctxt->attsSpecial == NULL) {
1366 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1367 if (ctxt->attsSpecial == NULL)
1368 goto mem_error;
1369 }
1370
1371 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1372 return;
1373
1374 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1375 (void *) (ptrdiff_t) type);
1376 return;
1377
1378 mem_error:
1379 xmlErrMemory(ctxt, NULL);
1380 return;
1381 }
1382
1383 /**
1384 * xmlCleanSpecialAttrCallback:
1385 *
1386 * Removes CDATA attributes from the special attribute table
1387 */
1388 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1389 xmlCleanSpecialAttrCallback(void *payload, void *data,
1390 const xmlChar *fullname, const xmlChar *fullattr,
1391 const xmlChar *unused ATTRIBUTE_UNUSED) {
1392 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1393
1394 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1395 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1396 }
1397 }
1398
1399 /**
1400 * xmlCleanSpecialAttr:
1401 * @ctxt: an XML parser context
1402 *
1403 * Trim the list of attributes defined to remove all those of type
1404 * CDATA as they are not special. This call should be done when finishing
1405 * to parse the DTD and before starting to parse the document root.
1406 */
1407 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1408 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1409 {
1410 if (ctxt->attsSpecial == NULL)
1411 return;
1412
1413 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1414
1415 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1416 xmlHashFree(ctxt->attsSpecial, NULL);
1417 ctxt->attsSpecial = NULL;
1418 }
1419 return;
1420 }
1421
1422 /**
1423 * xmlCheckLanguageID:
1424 * @lang: pointer to the string value
1425 *
1426 * Checks that the value conforms to the LanguageID production:
1427 *
1428 * NOTE: this is somewhat deprecated, those productions were removed from
1429 * the XML Second edition.
1430 *
1431 * [33] LanguageID ::= Langcode ('-' Subcode)*
1432 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1433 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1434 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1435 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1436 * [38] Subcode ::= ([a-z] | [A-Z])+
1437 *
1438 * The current REC reference the successors of RFC 1766, currently 5646
1439 *
1440 * http://www.rfc-editor.org/rfc/rfc5646.txt
1441 * langtag = language
1442 * ["-" script]
1443 * ["-" region]
1444 * *("-" variant)
1445 * *("-" extension)
1446 * ["-" privateuse]
1447 * language = 2*3ALPHA ; shortest ISO 639 code
1448 * ["-" extlang] ; sometimes followed by
1449 * ; extended language subtags
1450 * / 4ALPHA ; or reserved for future use
1451 * / 5*8ALPHA ; or registered language subtag
1452 *
1453 * extlang = 3ALPHA ; selected ISO 639 codes
1454 * *2("-" 3ALPHA) ; permanently reserved
1455 *
1456 * script = 4ALPHA ; ISO 15924 code
1457 *
1458 * region = 2ALPHA ; ISO 3166-1 code
1459 * / 3DIGIT ; UN M.49 code
1460 *
1461 * variant = 5*8alphanum ; registered variants
1462 * / (DIGIT 3alphanum)
1463 *
1464 * extension = singleton 1*("-" (2*8alphanum))
1465 *
1466 * ; Single alphanumerics
1467 * ; "x" reserved for private use
1468 * singleton = DIGIT ; 0 - 9
1469 * / %x41-57 ; A - W
1470 * / %x59-5A ; Y - Z
1471 * / %x61-77 ; a - w
1472 * / %x79-7A ; y - z
1473 *
1474 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1475 * The parser below doesn't try to cope with extension or privateuse
1476 * that could be added but that's not interoperable anyway
1477 *
1478 * Returns 1 if correct 0 otherwise
1479 **/
1480 int
xmlCheckLanguageID(const xmlChar * lang)1481 xmlCheckLanguageID(const xmlChar * lang)
1482 {
1483 const xmlChar *cur = lang, *nxt;
1484
1485 if (cur == NULL)
1486 return (0);
1487 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1488 ((cur[0] == 'I') && (cur[1] == '-')) ||
1489 ((cur[0] == 'x') && (cur[1] == '-')) ||
1490 ((cur[0] == 'X') && (cur[1] == '-'))) {
1491 /*
1492 * Still allow IANA code and user code which were coming
1493 * from the previous version of the XML-1.0 specification
1494 * it's deprecated but we should not fail
1495 */
1496 cur += 2;
1497 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1498 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1499 cur++;
1500 return(cur[0] == 0);
1501 }
1502 nxt = cur;
1503 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1504 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1505 nxt++;
1506 if (nxt - cur >= 4) {
1507 /*
1508 * Reserved
1509 */
1510 if ((nxt - cur > 8) || (nxt[0] != 0))
1511 return(0);
1512 return(1);
1513 }
1514 if (nxt - cur < 2)
1515 return(0);
1516 /* we got an ISO 639 code */
1517 if (nxt[0] == 0)
1518 return(1);
1519 if (nxt[0] != '-')
1520 return(0);
1521
1522 nxt++;
1523 cur = nxt;
1524 /* now we can have extlang or script or region or variant */
1525 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1526 goto region_m49;
1527
1528 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1529 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1530 nxt++;
1531 if (nxt - cur == 4)
1532 goto script;
1533 if (nxt - cur == 2)
1534 goto region;
1535 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1536 goto variant;
1537 if (nxt - cur != 3)
1538 return(0);
1539 /* we parsed an extlang */
1540 if (nxt[0] == 0)
1541 return(1);
1542 if (nxt[0] != '-')
1543 return(0);
1544
1545 nxt++;
1546 cur = nxt;
1547 /* now we can have script or region or variant */
1548 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1549 goto region_m49;
1550
1551 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1552 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1553 nxt++;
1554 if (nxt - cur == 2)
1555 goto region;
1556 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1557 goto variant;
1558 if (nxt - cur != 4)
1559 return(0);
1560 /* we parsed a script */
1561 script:
1562 if (nxt[0] == 0)
1563 return(1);
1564 if (nxt[0] != '-')
1565 return(0);
1566
1567 nxt++;
1568 cur = nxt;
1569 /* now we can have region or variant */
1570 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1571 goto region_m49;
1572
1573 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1574 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1575 nxt++;
1576
1577 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1578 goto variant;
1579 if (nxt - cur != 2)
1580 return(0);
1581 /* we parsed a region */
1582 region:
1583 if (nxt[0] == 0)
1584 return(1);
1585 if (nxt[0] != '-')
1586 return(0);
1587
1588 nxt++;
1589 cur = nxt;
1590 /* now we can just have a variant */
1591 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1592 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1593 nxt++;
1594
1595 if ((nxt - cur < 5) || (nxt - cur > 8))
1596 return(0);
1597
1598 /* we parsed a variant */
1599 variant:
1600 if (nxt[0] == 0)
1601 return(1);
1602 if (nxt[0] != '-')
1603 return(0);
1604 /* extensions and private use subtags not checked */
1605 return (1);
1606
1607 region_m49:
1608 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1609 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1610 nxt += 3;
1611 goto region;
1612 }
1613 return(0);
1614 }
1615
1616 /************************************************************************
1617 * *
1618 * Parser stacks related functions and macros *
1619 * *
1620 ************************************************************************/
1621
1622 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1623 const xmlChar ** str);
1624
1625 #ifdef SAX2
1626 /**
1627 * nsPush:
1628 * @ctxt: an XML parser context
1629 * @prefix: the namespace prefix or NULL
1630 * @URL: the namespace name
1631 *
1632 * Pushes a new parser namespace on top of the ns stack
1633 *
1634 * Returns -1 in case of error, -2 if the namespace should be discarded
1635 * and the index in the stack otherwise.
1636 */
1637 static int
nsPush(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URL)1638 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1639 {
1640 if (ctxt->options & XML_PARSE_NSCLEAN) {
1641 int i;
1642 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1643 if (ctxt->nsTab[i] == prefix) {
1644 /* in scope */
1645 if (ctxt->nsTab[i + 1] == URL)
1646 return(-2);
1647 /* out of scope keep it */
1648 break;
1649 }
1650 }
1651 }
1652 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1653 ctxt->nsMax = 10;
1654 ctxt->nsNr = 0;
1655 ctxt->nsTab = (const xmlChar **)
1656 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1657 if (ctxt->nsTab == NULL) {
1658 xmlErrMemory(ctxt, NULL);
1659 ctxt->nsMax = 0;
1660 return (-1);
1661 }
1662 } else if (ctxt->nsNr >= ctxt->nsMax) {
1663 const xmlChar ** tmp;
1664 ctxt->nsMax *= 2;
1665 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1666 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1667 if (tmp == NULL) {
1668 xmlErrMemory(ctxt, NULL);
1669 ctxt->nsMax /= 2;
1670 return (-1);
1671 }
1672 ctxt->nsTab = tmp;
1673 }
1674 ctxt->nsTab[ctxt->nsNr++] = prefix;
1675 ctxt->nsTab[ctxt->nsNr++] = URL;
1676 return (ctxt->nsNr);
1677 }
1678 /**
1679 * nsPop:
1680 * @ctxt: an XML parser context
1681 * @nr: the number to pop
1682 *
1683 * Pops the top @nr parser prefix/namespace from the ns stack
1684 *
1685 * Returns the number of namespaces removed
1686 */
1687 static int
nsPop(xmlParserCtxtPtr ctxt,int nr)1688 nsPop(xmlParserCtxtPtr ctxt, int nr)
1689 {
1690 int i;
1691
1692 if (ctxt->nsTab == NULL) return(0);
1693 if (ctxt->nsNr < nr) {
1694 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1695 nr = ctxt->nsNr;
1696 }
1697 if (ctxt->nsNr <= 0)
1698 return (0);
1699
1700 for (i = 0;i < nr;i++) {
1701 ctxt->nsNr--;
1702 ctxt->nsTab[ctxt->nsNr] = NULL;
1703 }
1704 return(nr);
1705 }
1706 #endif
1707
1708 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1709 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1710 const xmlChar **atts;
1711 int *attallocs;
1712 int maxatts;
1713
1714 if (ctxt->atts == NULL) {
1715 maxatts = 55; /* allow for 10 attrs by default */
1716 atts = (const xmlChar **)
1717 xmlMalloc(maxatts * sizeof(xmlChar *));
1718 if (atts == NULL) goto mem_error;
1719 ctxt->atts = atts;
1720 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1721 if (attallocs == NULL) goto mem_error;
1722 ctxt->attallocs = attallocs;
1723 ctxt->maxatts = maxatts;
1724 } else if (nr + 5 > ctxt->maxatts) {
1725 maxatts = (nr + 5) * 2;
1726 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1727 maxatts * sizeof(const xmlChar *));
1728 if (atts == NULL) goto mem_error;
1729 ctxt->atts = atts;
1730 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1731 (maxatts / 5) * sizeof(int));
1732 if (attallocs == NULL) goto mem_error;
1733 ctxt->attallocs = attallocs;
1734 ctxt->maxatts = maxatts;
1735 }
1736 return(ctxt->maxatts);
1737 mem_error:
1738 xmlErrMemory(ctxt, NULL);
1739 return(-1);
1740 }
1741
1742 /**
1743 * inputPush:
1744 * @ctxt: an XML parser context
1745 * @value: the parser input
1746 *
1747 * Pushes a new parser input on top of the input stack
1748 *
1749 * Returns -1 in case of error, the index in the stack otherwise
1750 */
1751 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1752 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1753 {
1754 if ((ctxt == NULL) || (value == NULL))
1755 return(-1);
1756 if (ctxt->inputNr >= ctxt->inputMax) {
1757 ctxt->inputMax *= 2;
1758 ctxt->inputTab =
1759 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1760 ctxt->inputMax *
1761 sizeof(ctxt->inputTab[0]));
1762 if (ctxt->inputTab == NULL) {
1763 xmlErrMemory(ctxt, NULL);
1764 xmlFreeInputStream(value);
1765 ctxt->inputMax /= 2;
1766 value = NULL;
1767 return (-1);
1768 }
1769 }
1770 ctxt->inputTab[ctxt->inputNr] = value;
1771 ctxt->input = value;
1772 return (ctxt->inputNr++);
1773 }
1774 /**
1775 * inputPop:
1776 * @ctxt: an XML parser context
1777 *
1778 * Pops the top parser input from the input stack
1779 *
1780 * Returns the input just removed
1781 */
1782 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1783 inputPop(xmlParserCtxtPtr ctxt)
1784 {
1785 xmlParserInputPtr ret;
1786
1787 if (ctxt == NULL)
1788 return(NULL);
1789 if (ctxt->inputNr <= 0)
1790 return (NULL);
1791 ctxt->inputNr--;
1792 if (ctxt->inputNr > 0)
1793 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1794 else
1795 ctxt->input = NULL;
1796 ret = ctxt->inputTab[ctxt->inputNr];
1797 ctxt->inputTab[ctxt->inputNr] = NULL;
1798 return (ret);
1799 }
1800 /**
1801 * nodePush:
1802 * @ctxt: an XML parser context
1803 * @value: the element node
1804 *
1805 * Pushes a new element node on top of the node stack
1806 *
1807 * Returns -1 in case of error, the index in the stack otherwise
1808 */
1809 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1810 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1811 {
1812 if (ctxt == NULL) return(0);
1813 if (ctxt->nodeNr >= ctxt->nodeMax) {
1814 xmlNodePtr *tmp;
1815
1816 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1817 ctxt->nodeMax * 2 *
1818 sizeof(ctxt->nodeTab[0]));
1819 if (tmp == NULL) {
1820 xmlErrMemory(ctxt, NULL);
1821 return (-1);
1822 }
1823 ctxt->nodeTab = tmp;
1824 ctxt->nodeMax *= 2;
1825 }
1826 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1827 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1828 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1829 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1830 xmlParserMaxDepth);
1831 xmlHaltParser(ctxt);
1832 return(-1);
1833 }
1834 ctxt->nodeTab[ctxt->nodeNr] = value;
1835 ctxt->node = value;
1836 return (ctxt->nodeNr++);
1837 }
1838
1839 /**
1840 * nodePop:
1841 * @ctxt: an XML parser context
1842 *
1843 * Pops the top element node from the node stack
1844 *
1845 * Returns the node just removed
1846 */
1847 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)1848 nodePop(xmlParserCtxtPtr ctxt)
1849 {
1850 xmlNodePtr ret;
1851
1852 if (ctxt == NULL) return(NULL);
1853 if (ctxt->nodeNr <= 0)
1854 return (NULL);
1855 ctxt->nodeNr--;
1856 if (ctxt->nodeNr > 0)
1857 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1858 else
1859 ctxt->node = NULL;
1860 ret = ctxt->nodeTab[ctxt->nodeNr];
1861 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1862 return (ret);
1863 }
1864
1865 /**
1866 * nameNsPush:
1867 * @ctxt: an XML parser context
1868 * @value: the element name
1869 * @prefix: the element prefix
1870 * @URI: the element namespace name
1871 * @line: the current line number for error messages
1872 * @nsNr: the number of namespaces pushed on the namespace table
1873 *
1874 * Pushes a new element name/prefix/URL on top of the name stack
1875 *
1876 * Returns -1 in case of error, the index in the stack otherwise
1877 */
1878 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr)1879 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1880 const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1881 {
1882 xmlStartTag *tag;
1883
1884 if (ctxt->nameNr >= ctxt->nameMax) {
1885 const xmlChar * *tmp;
1886 xmlStartTag *tmp2;
1887 ctxt->nameMax *= 2;
1888 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1889 ctxt->nameMax *
1890 sizeof(ctxt->nameTab[0]));
1891 if (tmp == NULL) {
1892 ctxt->nameMax /= 2;
1893 goto mem_error;
1894 }
1895 ctxt->nameTab = tmp;
1896 tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1897 ctxt->nameMax *
1898 sizeof(ctxt->pushTab[0]));
1899 if (tmp2 == NULL) {
1900 ctxt->nameMax /= 2;
1901 goto mem_error;
1902 }
1903 ctxt->pushTab = tmp2;
1904 } else if (ctxt->pushTab == NULL) {
1905 ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1906 sizeof(ctxt->pushTab[0]));
1907 if (ctxt->pushTab == NULL)
1908 goto mem_error;
1909 }
1910 ctxt->nameTab[ctxt->nameNr] = value;
1911 ctxt->name = value;
1912 tag = &ctxt->pushTab[ctxt->nameNr];
1913 tag->prefix = prefix;
1914 tag->URI = URI;
1915 tag->line = line;
1916 tag->nsNr = nsNr;
1917 return (ctxt->nameNr++);
1918 mem_error:
1919 xmlErrMemory(ctxt, NULL);
1920 return (-1);
1921 }
1922 #ifdef LIBXML_PUSH_ENABLED
1923 /**
1924 * nameNsPop:
1925 * @ctxt: an XML parser context
1926 *
1927 * Pops the top element/prefix/URI name from the name stack
1928 *
1929 * Returns the name just removed
1930 */
1931 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)1932 nameNsPop(xmlParserCtxtPtr ctxt)
1933 {
1934 const xmlChar *ret;
1935
1936 if (ctxt->nameNr <= 0)
1937 return (NULL);
1938 ctxt->nameNr--;
1939 if (ctxt->nameNr > 0)
1940 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1941 else
1942 ctxt->name = NULL;
1943 ret = ctxt->nameTab[ctxt->nameNr];
1944 ctxt->nameTab[ctxt->nameNr] = NULL;
1945 return (ret);
1946 }
1947 #endif /* LIBXML_PUSH_ENABLED */
1948
1949 /**
1950 * namePush:
1951 * @ctxt: an XML parser context
1952 * @value: the element name
1953 *
1954 * Pushes a new element name on top of the name stack
1955 *
1956 * Returns -1 in case of error, the index in the stack otherwise
1957 */
1958 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)1959 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1960 {
1961 if (ctxt == NULL) return (-1);
1962
1963 if (ctxt->nameNr >= ctxt->nameMax) {
1964 const xmlChar * *tmp;
1965 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1966 ctxt->nameMax * 2 *
1967 sizeof(ctxt->nameTab[0]));
1968 if (tmp == NULL) {
1969 goto mem_error;
1970 }
1971 ctxt->nameTab = tmp;
1972 ctxt->nameMax *= 2;
1973 }
1974 ctxt->nameTab[ctxt->nameNr] = value;
1975 ctxt->name = value;
1976 return (ctxt->nameNr++);
1977 mem_error:
1978 xmlErrMemory(ctxt, NULL);
1979 return (-1);
1980 }
1981 /**
1982 * namePop:
1983 * @ctxt: an XML parser context
1984 *
1985 * Pops the top element name from the name stack
1986 *
1987 * Returns the name just removed
1988 */
1989 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)1990 namePop(xmlParserCtxtPtr ctxt)
1991 {
1992 const xmlChar *ret;
1993
1994 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1995 return (NULL);
1996 ctxt->nameNr--;
1997 if (ctxt->nameNr > 0)
1998 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1999 else
2000 ctxt->name = NULL;
2001 ret = ctxt->nameTab[ctxt->nameNr];
2002 ctxt->nameTab[ctxt->nameNr] = NULL;
2003 return (ret);
2004 }
2005
spacePush(xmlParserCtxtPtr ctxt,int val)2006 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2007 if (ctxt->spaceNr >= ctxt->spaceMax) {
2008 int *tmp;
2009
2010 ctxt->spaceMax *= 2;
2011 tmp = (int *) xmlRealloc(ctxt->spaceTab,
2012 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2013 if (tmp == NULL) {
2014 xmlErrMemory(ctxt, NULL);
2015 ctxt->spaceMax /=2;
2016 return(-1);
2017 }
2018 ctxt->spaceTab = tmp;
2019 }
2020 ctxt->spaceTab[ctxt->spaceNr] = val;
2021 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2022 return(ctxt->spaceNr++);
2023 }
2024
spacePop(xmlParserCtxtPtr ctxt)2025 static int spacePop(xmlParserCtxtPtr ctxt) {
2026 int ret;
2027 if (ctxt->spaceNr <= 0) return(0);
2028 ctxt->spaceNr--;
2029 if (ctxt->spaceNr > 0)
2030 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2031 else
2032 ctxt->space = &ctxt->spaceTab[0];
2033 ret = ctxt->spaceTab[ctxt->spaceNr];
2034 ctxt->spaceTab[ctxt->spaceNr] = -1;
2035 return(ret);
2036 }
2037
2038 /*
2039 * Macros for accessing the content. Those should be used only by the parser,
2040 * and not exported.
2041 *
2042 * Dirty macros, i.e. one often need to make assumption on the context to
2043 * use them
2044 *
2045 * CUR_PTR return the current pointer to the xmlChar to be parsed.
2046 * To be used with extreme caution since operations consuming
2047 * characters may move the input buffer to a different location !
2048 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2049 * This should be used internally by the parser
2050 * only to compare to ASCII values otherwise it would break when
2051 * running with UTF-8 encoding.
2052 * RAW same as CUR but in the input buffer, bypass any token
2053 * extraction that may have been done
2054 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2055 * to compare on ASCII based substring.
2056 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2057 * strings without newlines within the parser.
2058 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2059 * defined char within the parser.
2060 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2061 *
2062 * NEXT Skip to the next character, this does the proper decoding
2063 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2064 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2065 * CUR_CHAR(l) returns the current unicode character (int), set l
2066 * to the number of xmlChars used for the encoding [0-5].
2067 * CUR_SCHAR same but operate on a string instead of the context
2068 * COPY_BUF copy the current unicode char to the target buffer, increment
2069 * the index
2070 * GROW, SHRINK handling of input buffers
2071 */
2072
2073 #define RAW (*ctxt->input->cur)
2074 #define CUR (*ctxt->input->cur)
2075 #define NXT(val) ctxt->input->cur[(val)]
2076 #define CUR_PTR ctxt->input->cur
2077 #define BASE_PTR ctxt->input->base
2078
2079 #define CMP4( s, c1, c2, c3, c4 ) \
2080 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2081 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2082 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2083 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2084 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2085 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2086 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2087 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2088 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2089 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2090 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2091 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2092 ((unsigned char *) s)[ 8 ] == c9 )
2093 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2094 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2095 ((unsigned char *) s)[ 9 ] == c10 )
2096
2097 #define SKIP(val) do { \
2098 ctxt->input->cur += (val),ctxt->input->col+=(val); \
2099 if (*ctxt->input->cur == 0) \
2100 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2101 } while (0)
2102
2103 #define SKIPL(val) do { \
2104 int skipl; \
2105 for(skipl=0; skipl<val; skipl++) { \
2106 if (*(ctxt->input->cur) == '\n') { \
2107 ctxt->input->line++; ctxt->input->col = 1; \
2108 } else ctxt->input->col++; \
2109 ctxt->input->cur++; \
2110 } \
2111 if (*ctxt->input->cur == 0) \
2112 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2113 } while (0)
2114
2115 #define SHRINK if ((ctxt->progressive == 0) && \
2116 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2117 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2118 xmlSHRINK (ctxt);
2119
xmlSHRINK(xmlParserCtxtPtr ctxt)2120 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2121 xmlParserInputShrink(ctxt->input);
2122 if (*ctxt->input->cur == 0)
2123 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2124 }
2125
2126 #define GROW if ((ctxt->progressive == 0) && \
2127 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2128 xmlGROW (ctxt);
2129
xmlGROW(xmlParserCtxtPtr ctxt)2130 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2131 ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2132 ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2133
2134 if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2135 (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2136 ((ctxt->input->buf) &&
2137 (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
2138 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2139 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2140 xmlHaltParser(ctxt);
2141 return;
2142 }
2143 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2144 if ((ctxt->input->cur > ctxt->input->end) ||
2145 (ctxt->input->cur < ctxt->input->base)) {
2146 xmlHaltParser(ctxt);
2147 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2148 return;
2149 }
2150 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2151 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2152 }
2153
2154 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2155
2156 #define NEXT xmlNextChar(ctxt)
2157
2158 #define NEXT1 { \
2159 ctxt->input->col++; \
2160 ctxt->input->cur++; \
2161 if (*ctxt->input->cur == 0) \
2162 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2163 }
2164
2165 #define NEXTL(l) do { \
2166 if (*(ctxt->input->cur) == '\n') { \
2167 ctxt->input->line++; ctxt->input->col = 1; \
2168 } else ctxt->input->col++; \
2169 ctxt->input->cur += l; \
2170 } while (0)
2171
2172 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2173 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2174
2175 #define COPY_BUF(l,b,i,v) \
2176 if (l == 1) b[i++] = (xmlChar) v; \
2177 else i += xmlCopyCharMultiByte(&b[i],v)
2178
2179 /**
2180 * xmlSkipBlankChars:
2181 * @ctxt: the XML parser context
2182 *
2183 * skip all blanks character found at that point in the input streams.
2184 * It pops up finished entities in the process if allowable at that point.
2185 *
2186 * Returns the number of space chars skipped
2187 */
2188
2189 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2190 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2191 int res = 0;
2192
2193 /*
2194 * It's Okay to use CUR/NEXT here since all the blanks are on
2195 * the ASCII range.
2196 */
2197 if (ctxt->instate != XML_PARSER_DTD) {
2198 const xmlChar *cur;
2199 /*
2200 * if we are in the document content, go really fast
2201 */
2202 cur = ctxt->input->cur;
2203 while (IS_BLANK_CH(*cur)) {
2204 if (*cur == '\n') {
2205 ctxt->input->line++; ctxt->input->col = 1;
2206 } else {
2207 ctxt->input->col++;
2208 }
2209 cur++;
2210 res++;
2211 if (*cur == 0) {
2212 ctxt->input->cur = cur;
2213 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2214 cur = ctxt->input->cur;
2215 }
2216 }
2217 ctxt->input->cur = cur;
2218 } else {
2219 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2220
2221 while (1) {
2222 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2223 NEXT;
2224 } else if (CUR == '%') {
2225 /*
2226 * Need to handle support of entities branching here
2227 */
2228 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2229 break;
2230 xmlParsePEReference(ctxt);
2231 } else if (CUR == 0) {
2232 if (ctxt->inputNr <= 1)
2233 break;
2234 xmlPopInput(ctxt);
2235 } else {
2236 break;
2237 }
2238
2239 /*
2240 * Also increase the counter when entering or exiting a PERef.
2241 * The spec says: "When a parameter-entity reference is recognized
2242 * in the DTD and included, its replacement text MUST be enlarged
2243 * by the attachment of one leading and one following space (#x20)
2244 * character."
2245 */
2246 res++;
2247 }
2248 }
2249 return(res);
2250 }
2251
2252 /************************************************************************
2253 * *
2254 * Commodity functions to handle entities *
2255 * *
2256 ************************************************************************/
2257
2258 /**
2259 * xmlPopInput:
2260 * @ctxt: an XML parser context
2261 *
2262 * xmlPopInput: the current input pointed by ctxt->input came to an end
2263 * pop it and return the next char.
2264 *
2265 * Returns the current xmlChar in the parser context
2266 */
2267 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2268 xmlPopInput(xmlParserCtxtPtr ctxt) {
2269 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2270 if (xmlParserDebugEntities)
2271 xmlGenericError(xmlGenericErrorContext,
2272 "Popping input %d\n", ctxt->inputNr);
2273 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2274 (ctxt->instate != XML_PARSER_EOF))
2275 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2276 "Unfinished entity outside the DTD");
2277 xmlFreeInputStream(inputPop(ctxt));
2278 if (*ctxt->input->cur == 0)
2279 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2280 return(CUR);
2281 }
2282
2283 /**
2284 * xmlPushInput:
2285 * @ctxt: an XML parser context
2286 * @input: an XML parser input fragment (entity, XML fragment ...).
2287 *
2288 * xmlPushInput: switch to a new input stream which is stacked on top
2289 * of the previous one(s).
2290 * Returns -1 in case of error or the index in the input stack
2291 */
2292 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2293 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2294 int ret;
2295 if (input == NULL) return(-1);
2296
2297 if (xmlParserDebugEntities) {
2298 if ((ctxt->input != NULL) && (ctxt->input->filename))
2299 xmlGenericError(xmlGenericErrorContext,
2300 "%s(%d): ", ctxt->input->filename,
2301 ctxt->input->line);
2302 xmlGenericError(xmlGenericErrorContext,
2303 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2304 }
2305 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2306 (ctxt->inputNr > 1024)) {
2307 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2308 while (ctxt->inputNr > 1)
2309 xmlFreeInputStream(inputPop(ctxt));
2310 return(-1);
2311 }
2312 ret = inputPush(ctxt, input);
2313 if (ctxt->instate == XML_PARSER_EOF)
2314 return(-1);
2315 GROW;
2316 return(ret);
2317 }
2318
2319 /**
2320 * xmlParseCharRef:
2321 * @ctxt: an XML parser context
2322 *
2323 * parse Reference declarations
2324 *
2325 * [66] CharRef ::= '&#' [0-9]+ ';' |
2326 * '&#x' [0-9a-fA-F]+ ';'
2327 *
2328 * [ WFC: Legal Character ]
2329 * Characters referred to using character references must match the
2330 * production for Char.
2331 *
2332 * Returns the value parsed (as an int), 0 in case of error
2333 */
2334 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2335 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2336 int val = 0;
2337 int count = 0;
2338
2339 /*
2340 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2341 */
2342 if ((RAW == '&') && (NXT(1) == '#') &&
2343 (NXT(2) == 'x')) {
2344 SKIP(3);
2345 GROW;
2346 while (RAW != ';') { /* loop blocked by count */
2347 if (count++ > 20) {
2348 count = 0;
2349 GROW;
2350 if (ctxt->instate == XML_PARSER_EOF)
2351 return(0);
2352 }
2353 if ((RAW >= '0') && (RAW <= '9'))
2354 val = val * 16 + (CUR - '0');
2355 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2356 val = val * 16 + (CUR - 'a') + 10;
2357 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2358 val = val * 16 + (CUR - 'A') + 10;
2359 else {
2360 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2361 val = 0;
2362 break;
2363 }
2364 if (val > 0x110000)
2365 val = 0x110000;
2366
2367 NEXT;
2368 count++;
2369 }
2370 if (RAW == ';') {
2371 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2372 ctxt->input->col++;
2373 ctxt->input->cur++;
2374 }
2375 } else if ((RAW == '&') && (NXT(1) == '#')) {
2376 SKIP(2);
2377 GROW;
2378 while (RAW != ';') { /* loop blocked by count */
2379 if (count++ > 20) {
2380 count = 0;
2381 GROW;
2382 if (ctxt->instate == XML_PARSER_EOF)
2383 return(0);
2384 }
2385 if ((RAW >= '0') && (RAW <= '9'))
2386 val = val * 10 + (CUR - '0');
2387 else {
2388 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2389 val = 0;
2390 break;
2391 }
2392 if (val > 0x110000)
2393 val = 0x110000;
2394
2395 NEXT;
2396 count++;
2397 }
2398 if (RAW == ';') {
2399 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2400 ctxt->input->col++;
2401 ctxt->input->cur++;
2402 }
2403 } else {
2404 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2405 }
2406
2407 /*
2408 * [ WFC: Legal Character ]
2409 * Characters referred to using character references must match the
2410 * production for Char.
2411 */
2412 if (val >= 0x110000) {
2413 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2414 "xmlParseCharRef: character reference out of bounds\n",
2415 val);
2416 } else if (IS_CHAR(val)) {
2417 return(val);
2418 } else {
2419 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2420 "xmlParseCharRef: invalid xmlChar value %d\n",
2421 val);
2422 }
2423 return(0);
2424 }
2425
2426 /**
2427 * xmlParseStringCharRef:
2428 * @ctxt: an XML parser context
2429 * @str: a pointer to an index in the string
2430 *
2431 * parse Reference declarations, variant parsing from a string rather
2432 * than an an input flow.
2433 *
2434 * [66] CharRef ::= '&#' [0-9]+ ';' |
2435 * '&#x' [0-9a-fA-F]+ ';'
2436 *
2437 * [ WFC: Legal Character ]
2438 * Characters referred to using character references must match the
2439 * production for Char.
2440 *
2441 * Returns the value parsed (as an int), 0 in case of error, str will be
2442 * updated to the current value of the index
2443 */
2444 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2445 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2446 const xmlChar *ptr;
2447 xmlChar cur;
2448 int val = 0;
2449
2450 if ((str == NULL) || (*str == NULL)) return(0);
2451 ptr = *str;
2452 cur = *ptr;
2453 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2454 ptr += 3;
2455 cur = *ptr;
2456 while (cur != ';') { /* Non input consuming loop */
2457 if ((cur >= '0') && (cur <= '9'))
2458 val = val * 16 + (cur - '0');
2459 else if ((cur >= 'a') && (cur <= 'f'))
2460 val = val * 16 + (cur - 'a') + 10;
2461 else if ((cur >= 'A') && (cur <= 'F'))
2462 val = val * 16 + (cur - 'A') + 10;
2463 else {
2464 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2465 val = 0;
2466 break;
2467 }
2468 if (val > 0x110000)
2469 val = 0x110000;
2470
2471 ptr++;
2472 cur = *ptr;
2473 }
2474 if (cur == ';')
2475 ptr++;
2476 } else if ((cur == '&') && (ptr[1] == '#')){
2477 ptr += 2;
2478 cur = *ptr;
2479 while (cur != ';') { /* Non input consuming loops */
2480 if ((cur >= '0') && (cur <= '9'))
2481 val = val * 10 + (cur - '0');
2482 else {
2483 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2484 val = 0;
2485 break;
2486 }
2487 if (val > 0x110000)
2488 val = 0x110000;
2489
2490 ptr++;
2491 cur = *ptr;
2492 }
2493 if (cur == ';')
2494 ptr++;
2495 } else {
2496 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2497 return(0);
2498 }
2499 *str = ptr;
2500
2501 /*
2502 * [ WFC: Legal Character ]
2503 * Characters referred to using character references must match the
2504 * production for Char.
2505 */
2506 if (val >= 0x110000) {
2507 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2508 "xmlParseStringCharRef: character reference out of bounds\n",
2509 val);
2510 } else if (IS_CHAR(val)) {
2511 return(val);
2512 } else {
2513 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2514 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2515 val);
2516 }
2517 return(0);
2518 }
2519
2520 /**
2521 * xmlParserHandlePEReference:
2522 * @ctxt: the parser context
2523 *
2524 * [69] PEReference ::= '%' Name ';'
2525 *
2526 * [ WFC: No Recursion ]
2527 * A parsed entity must not contain a recursive
2528 * reference to itself, either directly or indirectly.
2529 *
2530 * [ WFC: Entity Declared ]
2531 * In a document without any DTD, a document with only an internal DTD
2532 * subset which contains no parameter entity references, or a document
2533 * with "standalone='yes'", ... ... The declaration of a parameter
2534 * entity must precede any reference to it...
2535 *
2536 * [ VC: Entity Declared ]
2537 * In a document with an external subset or external parameter entities
2538 * with "standalone='no'", ... ... The declaration of a parameter entity
2539 * must precede any reference to it...
2540 *
2541 * [ WFC: In DTD ]
2542 * Parameter-entity references may only appear in the DTD.
2543 * NOTE: misleading but this is handled.
2544 *
2545 * A PEReference may have been detected in the current input stream
2546 * the handling is done accordingly to
2547 * http://www.w3.org/TR/REC-xml#entproc
2548 * i.e.
2549 * - Included in literal in entity values
2550 * - Included as Parameter Entity reference within DTDs
2551 */
2552 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2553 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2554 switch(ctxt->instate) {
2555 case XML_PARSER_CDATA_SECTION:
2556 return;
2557 case XML_PARSER_COMMENT:
2558 return;
2559 case XML_PARSER_START_TAG:
2560 return;
2561 case XML_PARSER_END_TAG:
2562 return;
2563 case XML_PARSER_EOF:
2564 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2565 return;
2566 case XML_PARSER_PROLOG:
2567 case XML_PARSER_START:
2568 case XML_PARSER_MISC:
2569 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2570 return;
2571 case XML_PARSER_ENTITY_DECL:
2572 case XML_PARSER_CONTENT:
2573 case XML_PARSER_ATTRIBUTE_VALUE:
2574 case XML_PARSER_PI:
2575 case XML_PARSER_SYSTEM_LITERAL:
2576 case XML_PARSER_PUBLIC_LITERAL:
2577 /* we just ignore it there */
2578 return;
2579 case XML_PARSER_EPILOG:
2580 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2581 return;
2582 case XML_PARSER_ENTITY_VALUE:
2583 /*
2584 * NOTE: in the case of entity values, we don't do the
2585 * substitution here since we need the literal
2586 * entity value to be able to save the internal
2587 * subset of the document.
2588 * This will be handled by xmlStringDecodeEntities
2589 */
2590 return;
2591 case XML_PARSER_DTD:
2592 /*
2593 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2594 * In the internal DTD subset, parameter-entity references
2595 * can occur only where markup declarations can occur, not
2596 * within markup declarations.
2597 * In that case this is handled in xmlParseMarkupDecl
2598 */
2599 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2600 return;
2601 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2602 return;
2603 break;
2604 case XML_PARSER_IGNORE:
2605 return;
2606 }
2607
2608 xmlParsePEReference(ctxt);
2609 }
2610
2611 /*
2612 * Macro used to grow the current buffer.
2613 * buffer##_size is expected to be a size_t
2614 * mem_error: is expected to handle memory allocation failures
2615 */
2616 #define growBuffer(buffer, n) { \
2617 xmlChar *tmp; \
2618 size_t new_size = buffer##_size * 2 + n; \
2619 if (new_size < buffer##_size) goto mem_error; \
2620 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2621 if (tmp == NULL) goto mem_error; \
2622 buffer = tmp; \
2623 buffer##_size = new_size; \
2624 }
2625
2626 /**
2627 * xmlStringLenDecodeEntities:
2628 * @ctxt: the parser context
2629 * @str: the input string
2630 * @len: the string length
2631 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2632 * @end: an end marker xmlChar, 0 if none
2633 * @end2: an end marker xmlChar, 0 if none
2634 * @end3: an end marker xmlChar, 0 if none
2635 *
2636 * Takes a entity string content and process to do the adequate substitutions.
2637 *
2638 * [67] Reference ::= EntityRef | CharRef
2639 *
2640 * [69] PEReference ::= '%' Name ';'
2641 *
2642 * Returns A newly allocated string with the substitution done. The caller
2643 * must deallocate it !
2644 */
2645 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3)2646 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2647 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2648 xmlChar *buffer = NULL;
2649 size_t buffer_size = 0;
2650 size_t nbchars = 0;
2651
2652 xmlChar *current = NULL;
2653 xmlChar *rep = NULL;
2654 const xmlChar *last;
2655 xmlEntityPtr ent;
2656 int c,l;
2657
2658 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2659 return(NULL);
2660 last = str + len;
2661
2662 if (((ctxt->depth > 40) &&
2663 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2664 (ctxt->depth > 1024)) {
2665 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2666 return(NULL);
2667 }
2668
2669 /*
2670 * allocate a translation buffer.
2671 */
2672 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2673 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2674 if (buffer == NULL) goto mem_error;
2675
2676 /*
2677 * OK loop until we reach one of the ending char or a size limit.
2678 * we are operating on already parsed values.
2679 */
2680 if (str < last)
2681 c = CUR_SCHAR(str, l);
2682 else
2683 c = 0;
2684 while ((c != 0) && (c != end) && /* non input consuming loop */
2685 (c != end2) && (c != end3) &&
2686 (ctxt->instate != XML_PARSER_EOF)) {
2687
2688 if (c == 0) break;
2689 if ((c == '&') && (str[1] == '#')) {
2690 int val = xmlParseStringCharRef(ctxt, &str);
2691 if (val == 0)
2692 goto int_error;
2693 COPY_BUF(0,buffer,nbchars,val);
2694 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2695 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2696 }
2697 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2698 if (xmlParserDebugEntities)
2699 xmlGenericError(xmlGenericErrorContext,
2700 "String decoding Entity Reference: %.30s\n",
2701 str);
2702 ent = xmlParseStringEntityRef(ctxt, &str);
2703 xmlParserEntityCheck(ctxt, 0, ent, 0);
2704 if (ent != NULL)
2705 ctxt->nbentities += ent->checked / 2;
2706 if ((ent != NULL) &&
2707 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2708 if (ent->content != NULL) {
2709 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2710 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2711 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2712 }
2713 } else {
2714 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2715 "predefined entity has no content\n");
2716 goto int_error;
2717 }
2718 } else if ((ent != NULL) && (ent->content != NULL)) {
2719 ctxt->depth++;
2720 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2721 0, 0, 0);
2722 ctxt->depth--;
2723 if (rep == NULL) {
2724 ent->content[0] = 0;
2725 goto int_error;
2726 }
2727
2728 current = rep;
2729 while (*current != 0) { /* non input consuming loop */
2730 buffer[nbchars++] = *current++;
2731 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2732 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2733 goto int_error;
2734 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2735 }
2736 }
2737 xmlFree(rep);
2738 rep = NULL;
2739 } else if (ent != NULL) {
2740 int i = xmlStrlen(ent->name);
2741 const xmlChar *cur = ent->name;
2742
2743 buffer[nbchars++] = '&';
2744 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2745 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2746 }
2747 for (;i > 0;i--)
2748 buffer[nbchars++] = *cur++;
2749 buffer[nbchars++] = ';';
2750 }
2751 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2752 if (xmlParserDebugEntities)
2753 xmlGenericError(xmlGenericErrorContext,
2754 "String decoding PE Reference: %.30s\n", str);
2755 ent = xmlParseStringPEReference(ctxt, &str);
2756 xmlParserEntityCheck(ctxt, 0, ent, 0);
2757 if (ent != NULL)
2758 ctxt->nbentities += ent->checked / 2;
2759 if (ent != NULL) {
2760 if (ent->content == NULL) {
2761 /*
2762 * Note: external parsed entities will not be loaded,
2763 * it is not required for a non-validating parser to
2764 * complete external PEReferences coming from the
2765 * internal subset
2766 */
2767 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2768 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2769 (ctxt->validate != 0)) {
2770 xmlLoadEntityContent(ctxt, ent);
2771 } else {
2772 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2773 "not validating will not read content for PE entity %s\n",
2774 ent->name, NULL);
2775 }
2776 }
2777 ctxt->depth++;
2778 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2779 0, 0, 0);
2780 ctxt->depth--;
2781 if (rep == NULL) {
2782 if (ent->content != NULL)
2783 ent->content[0] = 0;
2784 goto int_error;
2785 }
2786 current = rep;
2787 while (*current != 0) { /* non input consuming loop */
2788 buffer[nbchars++] = *current++;
2789 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2790 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2791 goto int_error;
2792 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2793 }
2794 }
2795 xmlFree(rep);
2796 rep = NULL;
2797 }
2798 } else {
2799 COPY_BUF(l,buffer,nbchars,c);
2800 str += l;
2801 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2802 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2803 }
2804 }
2805 if (str < last)
2806 c = CUR_SCHAR(str, l);
2807 else
2808 c = 0;
2809 }
2810 buffer[nbchars] = 0;
2811 return(buffer);
2812
2813 mem_error:
2814 xmlErrMemory(ctxt, NULL);
2815 int_error:
2816 if (rep != NULL)
2817 xmlFree(rep);
2818 if (buffer != NULL)
2819 xmlFree(buffer);
2820 return(NULL);
2821 }
2822
2823 /**
2824 * xmlStringDecodeEntities:
2825 * @ctxt: the parser context
2826 * @str: the input string
2827 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2828 * @end: an end marker xmlChar, 0 if none
2829 * @end2: an end marker xmlChar, 0 if none
2830 * @end3: an end marker xmlChar, 0 if none
2831 *
2832 * Takes a entity string content and process to do the adequate substitutions.
2833 *
2834 * [67] Reference ::= EntityRef | CharRef
2835 *
2836 * [69] PEReference ::= '%' Name ';'
2837 *
2838 * Returns A newly allocated string with the substitution done. The caller
2839 * must deallocate it !
2840 */
2841 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what,xmlChar end,xmlChar end2,xmlChar end3)2842 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2843 xmlChar end, xmlChar end2, xmlChar end3) {
2844 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2845 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2846 end, end2, end3));
2847 }
2848
2849 /************************************************************************
2850 * *
2851 * Commodity functions, cleanup needed ? *
2852 * *
2853 ************************************************************************/
2854
2855 /**
2856 * areBlanks:
2857 * @ctxt: an XML parser context
2858 * @str: a xmlChar *
2859 * @len: the size of @str
2860 * @blank_chars: we know the chars are blanks
2861 *
2862 * Is this a sequence of blank chars that one can ignore ?
2863 *
2864 * Returns 1 if ignorable 0 otherwise.
2865 */
2866
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2867 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2868 int blank_chars) {
2869 int i, ret;
2870 xmlNodePtr lastChild;
2871
2872 /*
2873 * Don't spend time trying to differentiate them, the same callback is
2874 * used !
2875 */
2876 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2877 return(0);
2878
2879 /*
2880 * Check for xml:space value.
2881 */
2882 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2883 (*(ctxt->space) == -2))
2884 return(0);
2885
2886 /*
2887 * Check that the string is made of blanks
2888 */
2889 if (blank_chars == 0) {
2890 for (i = 0;i < len;i++)
2891 if (!(IS_BLANK_CH(str[i]))) return(0);
2892 }
2893
2894 /*
2895 * Look if the element is mixed content in the DTD if available
2896 */
2897 if (ctxt->node == NULL) return(0);
2898 if (ctxt->myDoc != NULL) {
2899 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2900 if (ret == 0) return(1);
2901 if (ret == 1) return(0);
2902 }
2903
2904 /*
2905 * Otherwise, heuristic :-\
2906 */
2907 if ((RAW != '<') && (RAW != 0xD)) return(0);
2908 if ((ctxt->node->children == NULL) &&
2909 (RAW == '<') && (NXT(1) == '/')) return(0);
2910
2911 lastChild = xmlGetLastChild(ctxt->node);
2912 if (lastChild == NULL) {
2913 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2914 (ctxt->node->content != NULL)) return(0);
2915 } else if (xmlNodeIsText(lastChild))
2916 return(0);
2917 else if ((ctxt->node->children != NULL) &&
2918 (xmlNodeIsText(ctxt->node->children)))
2919 return(0);
2920 return(1);
2921 }
2922
2923 /************************************************************************
2924 * *
2925 * Extra stuff for namespace support *
2926 * Relates to http://www.w3.org/TR/WD-xml-names *
2927 * *
2928 ************************************************************************/
2929
2930 /**
2931 * xmlSplitQName:
2932 * @ctxt: an XML parser context
2933 * @name: an XML parser context
2934 * @prefix: a xmlChar **
2935 *
2936 * parse an UTF8 encoded XML qualified name string
2937 *
2938 * [NS 5] QName ::= (Prefix ':')? LocalPart
2939 *
2940 * [NS 6] Prefix ::= NCName
2941 *
2942 * [NS 7] LocalPart ::= NCName
2943 *
2944 * Returns the local part, and prefix is updated
2945 * to get the Prefix if any.
2946 */
2947
2948 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefix)2949 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2950 xmlChar buf[XML_MAX_NAMELEN + 5];
2951 xmlChar *buffer = NULL;
2952 int len = 0;
2953 int max = XML_MAX_NAMELEN;
2954 xmlChar *ret = NULL;
2955 const xmlChar *cur = name;
2956 int c;
2957
2958 if (prefix == NULL) return(NULL);
2959 *prefix = NULL;
2960
2961 if (cur == NULL) return(NULL);
2962
2963 #ifndef XML_XML_NAMESPACE
2964 /* xml: prefix is not really a namespace */
2965 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2966 (cur[2] == 'l') && (cur[3] == ':'))
2967 return(xmlStrdup(name));
2968 #endif
2969
2970 /* nasty but well=formed */
2971 if (cur[0] == ':')
2972 return(xmlStrdup(name));
2973
2974 c = *cur++;
2975 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2976 buf[len++] = c;
2977 c = *cur++;
2978 }
2979 if (len >= max) {
2980 /*
2981 * Okay someone managed to make a huge name, so he's ready to pay
2982 * for the processing speed.
2983 */
2984 max = len * 2;
2985
2986 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2987 if (buffer == NULL) {
2988 xmlErrMemory(ctxt, NULL);
2989 return(NULL);
2990 }
2991 memcpy(buffer, buf, len);
2992 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2993 if (len + 10 > max) {
2994 xmlChar *tmp;
2995
2996 max *= 2;
2997 tmp = (xmlChar *) xmlRealloc(buffer,
2998 max * sizeof(xmlChar));
2999 if (tmp == NULL) {
3000 xmlFree(buffer);
3001 xmlErrMemory(ctxt, NULL);
3002 return(NULL);
3003 }
3004 buffer = tmp;
3005 }
3006 buffer[len++] = c;
3007 c = *cur++;
3008 }
3009 buffer[len] = 0;
3010 }
3011
3012 if ((c == ':') && (*cur == 0)) {
3013 if (buffer != NULL)
3014 xmlFree(buffer);
3015 *prefix = NULL;
3016 return(xmlStrdup(name));
3017 }
3018
3019 if (buffer == NULL)
3020 ret = xmlStrndup(buf, len);
3021 else {
3022 ret = buffer;
3023 buffer = NULL;
3024 max = XML_MAX_NAMELEN;
3025 }
3026
3027
3028 if (c == ':') {
3029 c = *cur;
3030 *prefix = ret;
3031 if (c == 0) {
3032 return(xmlStrndup(BAD_CAST "", 0));
3033 }
3034 len = 0;
3035
3036 /*
3037 * Check that the first character is proper to start
3038 * a new name
3039 */
3040 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3041 ((c >= 0x41) && (c <= 0x5A)) ||
3042 (c == '_') || (c == ':'))) {
3043 int l;
3044 int first = CUR_SCHAR(cur, l);
3045
3046 if (!IS_LETTER(first) && (first != '_')) {
3047 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3048 "Name %s is not XML Namespace compliant\n",
3049 name);
3050 }
3051 }
3052 cur++;
3053
3054 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3055 buf[len++] = c;
3056 c = *cur++;
3057 }
3058 if (len >= max) {
3059 /*
3060 * Okay someone managed to make a huge name, so he's ready to pay
3061 * for the processing speed.
3062 */
3063 max = len * 2;
3064
3065 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3066 if (buffer == NULL) {
3067 xmlErrMemory(ctxt, NULL);
3068 return(NULL);
3069 }
3070 memcpy(buffer, buf, len);
3071 while (c != 0) { /* tested bigname2.xml */
3072 if (len + 10 > max) {
3073 xmlChar *tmp;
3074
3075 max *= 2;
3076 tmp = (xmlChar *) xmlRealloc(buffer,
3077 max * sizeof(xmlChar));
3078 if (tmp == NULL) {
3079 xmlErrMemory(ctxt, NULL);
3080 xmlFree(buffer);
3081 return(NULL);
3082 }
3083 buffer = tmp;
3084 }
3085 buffer[len++] = c;
3086 c = *cur++;
3087 }
3088 buffer[len] = 0;
3089 }
3090
3091 if (buffer == NULL)
3092 ret = xmlStrndup(buf, len);
3093 else {
3094 ret = buffer;
3095 }
3096 }
3097
3098 return(ret);
3099 }
3100
3101 /************************************************************************
3102 * *
3103 * The parser itself *
3104 * Relates to http://www.w3.org/TR/REC-xml *
3105 * *
3106 ************************************************************************/
3107
3108 /************************************************************************
3109 * *
3110 * Routines to parse Name, NCName and NmToken *
3111 * *
3112 ************************************************************************/
3113 #ifdef DEBUG
3114 static unsigned long nbParseName = 0;
3115 static unsigned long nbParseNmToken = 0;
3116 static unsigned long nbParseNCName = 0;
3117 static unsigned long nbParseNCNameComplex = 0;
3118 static unsigned long nbParseNameComplex = 0;
3119 static unsigned long nbParseStringName = 0;
3120 #endif
3121
3122 /*
3123 * The two following functions are related to the change of accepted
3124 * characters for Name and NmToken in the Revision 5 of XML-1.0
3125 * They correspond to the modified production [4] and the new production [4a]
3126 * changes in that revision. Also note that the macros used for the
3127 * productions Letter, Digit, CombiningChar and Extender are not needed
3128 * anymore.
3129 * We still keep compatibility to pre-revision5 parsing semantic if the
3130 * new XML_PARSE_OLD10 option is given to the parser.
3131 */
3132 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3133 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3134 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3135 /*
3136 * Use the new checks of production [4] [4a] amd [5] of the
3137 * Update 5 of XML-1.0
3138 */
3139 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3140 (((c >= 'a') && (c <= 'z')) ||
3141 ((c >= 'A') && (c <= 'Z')) ||
3142 (c == '_') || (c == ':') ||
3143 ((c >= 0xC0) && (c <= 0xD6)) ||
3144 ((c >= 0xD8) && (c <= 0xF6)) ||
3145 ((c >= 0xF8) && (c <= 0x2FF)) ||
3146 ((c >= 0x370) && (c <= 0x37D)) ||
3147 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3148 ((c >= 0x200C) && (c <= 0x200D)) ||
3149 ((c >= 0x2070) && (c <= 0x218F)) ||
3150 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3151 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3152 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3153 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3154 ((c >= 0x10000) && (c <= 0xEFFFF))))
3155 return(1);
3156 } else {
3157 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3158 return(1);
3159 }
3160 return(0);
3161 }
3162
3163 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3164 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3165 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3166 /*
3167 * Use the new checks of production [4] [4a] amd [5] of the
3168 * Update 5 of XML-1.0
3169 */
3170 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3171 (((c >= 'a') && (c <= 'z')) ||
3172 ((c >= 'A') && (c <= 'Z')) ||
3173 ((c >= '0') && (c <= '9')) || /* !start */
3174 (c == '_') || (c == ':') ||
3175 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3176 ((c >= 0xC0) && (c <= 0xD6)) ||
3177 ((c >= 0xD8) && (c <= 0xF6)) ||
3178 ((c >= 0xF8) && (c <= 0x2FF)) ||
3179 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3180 ((c >= 0x370) && (c <= 0x37D)) ||
3181 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3182 ((c >= 0x200C) && (c <= 0x200D)) ||
3183 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3184 ((c >= 0x2070) && (c <= 0x218F)) ||
3185 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3186 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3187 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3188 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3189 ((c >= 0x10000) && (c <= 0xEFFFF))))
3190 return(1);
3191 } else {
3192 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3193 (c == '.') || (c == '-') ||
3194 (c == '_') || (c == ':') ||
3195 (IS_COMBINING(c)) ||
3196 (IS_EXTENDER(c)))
3197 return(1);
3198 }
3199 return(0);
3200 }
3201
3202 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3203 int *len, int *alloc, int normalize);
3204
3205 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3206 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3207 int len = 0, l;
3208 int c;
3209 int count = 0;
3210
3211 #ifdef DEBUG
3212 nbParseNameComplex++;
3213 #endif
3214
3215 /*
3216 * Handler for more complex cases
3217 */
3218 GROW;
3219 if (ctxt->instate == XML_PARSER_EOF)
3220 return(NULL);
3221 c = CUR_CHAR(l);
3222 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3223 /*
3224 * Use the new checks of production [4] [4a] amd [5] of the
3225 * Update 5 of XML-1.0
3226 */
3227 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3228 (!(((c >= 'a') && (c <= 'z')) ||
3229 ((c >= 'A') && (c <= 'Z')) ||
3230 (c == '_') || (c == ':') ||
3231 ((c >= 0xC0) && (c <= 0xD6)) ||
3232 ((c >= 0xD8) && (c <= 0xF6)) ||
3233 ((c >= 0xF8) && (c <= 0x2FF)) ||
3234 ((c >= 0x370) && (c <= 0x37D)) ||
3235 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3236 ((c >= 0x200C) && (c <= 0x200D)) ||
3237 ((c >= 0x2070) && (c <= 0x218F)) ||
3238 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3239 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3240 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3241 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3242 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3243 return(NULL);
3244 }
3245 len += l;
3246 NEXTL(l);
3247 c = CUR_CHAR(l);
3248 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3249 (((c >= 'a') && (c <= 'z')) ||
3250 ((c >= 'A') && (c <= 'Z')) ||
3251 ((c >= '0') && (c <= '9')) || /* !start */
3252 (c == '_') || (c == ':') ||
3253 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3254 ((c >= 0xC0) && (c <= 0xD6)) ||
3255 ((c >= 0xD8) && (c <= 0xF6)) ||
3256 ((c >= 0xF8) && (c <= 0x2FF)) ||
3257 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3258 ((c >= 0x370) && (c <= 0x37D)) ||
3259 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3260 ((c >= 0x200C) && (c <= 0x200D)) ||
3261 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3262 ((c >= 0x2070) && (c <= 0x218F)) ||
3263 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3264 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3265 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3266 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3267 ((c >= 0x10000) && (c <= 0xEFFFF))
3268 )) {
3269 if (count++ > XML_PARSER_CHUNK_SIZE) {
3270 count = 0;
3271 GROW;
3272 if (ctxt->instate == XML_PARSER_EOF)
3273 return(NULL);
3274 }
3275 len += l;
3276 NEXTL(l);
3277 c = CUR_CHAR(l);
3278 }
3279 } else {
3280 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3281 (!IS_LETTER(c) && (c != '_') &&
3282 (c != ':'))) {
3283 return(NULL);
3284 }
3285 len += l;
3286 NEXTL(l);
3287 c = CUR_CHAR(l);
3288
3289 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3290 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3291 (c == '.') || (c == '-') ||
3292 (c == '_') || (c == ':') ||
3293 (IS_COMBINING(c)) ||
3294 (IS_EXTENDER(c)))) {
3295 if (count++ > XML_PARSER_CHUNK_SIZE) {
3296 count = 0;
3297 GROW;
3298 if (ctxt->instate == XML_PARSER_EOF)
3299 return(NULL);
3300 }
3301 len += l;
3302 NEXTL(l);
3303 c = CUR_CHAR(l);
3304 }
3305 }
3306 if ((len > XML_MAX_NAME_LENGTH) &&
3307 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3308 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3309 return(NULL);
3310 }
3311 if (ctxt->input->cur - ctxt->input->base < len) {
3312 /*
3313 * There were a couple of bugs where PERefs lead to to a change
3314 * of the buffer. Check the buffer size to avoid passing an invalid
3315 * pointer to xmlDictLookup.
3316 */
3317 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3318 "unexpected change of input buffer");
3319 return (NULL);
3320 }
3321 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3322 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3323 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3324 }
3325
3326 /**
3327 * xmlParseName:
3328 * @ctxt: an XML parser context
3329 *
3330 * parse an XML name.
3331 *
3332 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3333 * CombiningChar | Extender
3334 *
3335 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3336 *
3337 * [6] Names ::= Name (#x20 Name)*
3338 *
3339 * Returns the Name parsed or NULL
3340 */
3341
3342 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3343 xmlParseName(xmlParserCtxtPtr ctxt) {
3344 const xmlChar *in;
3345 const xmlChar *ret;
3346 int count = 0;
3347
3348 GROW;
3349
3350 #ifdef DEBUG
3351 nbParseName++;
3352 #endif
3353
3354 /*
3355 * Accelerator for simple ASCII names
3356 */
3357 in = ctxt->input->cur;
3358 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3359 ((*in >= 0x41) && (*in <= 0x5A)) ||
3360 (*in == '_') || (*in == ':')) {
3361 in++;
3362 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3363 ((*in >= 0x41) && (*in <= 0x5A)) ||
3364 ((*in >= 0x30) && (*in <= 0x39)) ||
3365 (*in == '_') || (*in == '-') ||
3366 (*in == ':') || (*in == '.'))
3367 in++;
3368 if ((*in > 0) && (*in < 0x80)) {
3369 count = in - ctxt->input->cur;
3370 if ((count > XML_MAX_NAME_LENGTH) &&
3371 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3372 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3373 return(NULL);
3374 }
3375 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3376 ctxt->input->cur = in;
3377 ctxt->input->col += count;
3378 if (ret == NULL)
3379 xmlErrMemory(ctxt, NULL);
3380 return(ret);
3381 }
3382 }
3383 /* accelerator for special cases */
3384 return(xmlParseNameComplex(ctxt));
3385 }
3386
3387 static const xmlChar *
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3388 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3389 int len = 0, l;
3390 int c;
3391 int count = 0;
3392 size_t startPosition = 0;
3393
3394 #ifdef DEBUG
3395 nbParseNCNameComplex++;
3396 #endif
3397
3398 /*
3399 * Handler for more complex cases
3400 */
3401 GROW;
3402 startPosition = CUR_PTR - BASE_PTR;
3403 c = CUR_CHAR(l);
3404 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3405 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3406 return(NULL);
3407 }
3408
3409 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3410 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3411 if (count++ > XML_PARSER_CHUNK_SIZE) {
3412 if ((len > XML_MAX_NAME_LENGTH) &&
3413 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3414 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3415 return(NULL);
3416 }
3417 count = 0;
3418 GROW;
3419 if (ctxt->instate == XML_PARSER_EOF)
3420 return(NULL);
3421 }
3422 len += l;
3423 NEXTL(l);
3424 c = CUR_CHAR(l);
3425 if (c == 0) {
3426 count = 0;
3427 /*
3428 * when shrinking to extend the buffer we really need to preserve
3429 * the part of the name we already parsed. Hence rolling back
3430 * by current length.
3431 */
3432 ctxt->input->cur -= l;
3433 GROW;
3434 if (ctxt->instate == XML_PARSER_EOF)
3435 return(NULL);
3436 ctxt->input->cur += l;
3437 c = CUR_CHAR(l);
3438 }
3439 }
3440 if ((len > XML_MAX_NAME_LENGTH) &&
3441 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3442 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3443 return(NULL);
3444 }
3445 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3446 }
3447
3448 /**
3449 * xmlParseNCName:
3450 * @ctxt: an XML parser context
3451 * @len: length of the string parsed
3452 *
3453 * parse an XML name.
3454 *
3455 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3456 * CombiningChar | Extender
3457 *
3458 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3459 *
3460 * Returns the Name parsed or NULL
3461 */
3462
3463 static const xmlChar *
xmlParseNCName(xmlParserCtxtPtr ctxt)3464 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3465 const xmlChar *in, *e;
3466 const xmlChar *ret;
3467 int count = 0;
3468
3469 #ifdef DEBUG
3470 nbParseNCName++;
3471 #endif
3472
3473 /*
3474 * Accelerator for simple ASCII names
3475 */
3476 in = ctxt->input->cur;
3477 e = ctxt->input->end;
3478 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3479 ((*in >= 0x41) && (*in <= 0x5A)) ||
3480 (*in == '_')) && (in < e)) {
3481 in++;
3482 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3483 ((*in >= 0x41) && (*in <= 0x5A)) ||
3484 ((*in >= 0x30) && (*in <= 0x39)) ||
3485 (*in == '_') || (*in == '-') ||
3486 (*in == '.')) && (in < e))
3487 in++;
3488 if (in >= e)
3489 goto complex;
3490 if ((*in > 0) && (*in < 0x80)) {
3491 count = in - ctxt->input->cur;
3492 if ((count > XML_MAX_NAME_LENGTH) &&
3493 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3494 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3495 return(NULL);
3496 }
3497 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3498 ctxt->input->cur = in;
3499 ctxt->input->col += count;
3500 if (ret == NULL) {
3501 xmlErrMemory(ctxt, NULL);
3502 }
3503 return(ret);
3504 }
3505 }
3506 complex:
3507 return(xmlParseNCNameComplex(ctxt));
3508 }
3509
3510 /**
3511 * xmlParseNameAndCompare:
3512 * @ctxt: an XML parser context
3513 *
3514 * parse an XML name and compares for match
3515 * (specialized for endtag parsing)
3516 *
3517 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3518 * and the name for mismatch
3519 */
3520
3521 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3522 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3523 register const xmlChar *cmp = other;
3524 register const xmlChar *in;
3525 const xmlChar *ret;
3526
3527 GROW;
3528 if (ctxt->instate == XML_PARSER_EOF)
3529 return(NULL);
3530
3531 in = ctxt->input->cur;
3532 while (*in != 0 && *in == *cmp) {
3533 ++in;
3534 ++cmp;
3535 }
3536 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3537 /* success */
3538 ctxt->input->col += in - ctxt->input->cur;
3539 ctxt->input->cur = in;
3540 return (const xmlChar*) 1;
3541 }
3542 /* failure (or end of input buffer), check with full function */
3543 ret = xmlParseName (ctxt);
3544 /* strings coming from the dictionary direct compare possible */
3545 if (ret == other) {
3546 return (const xmlChar*) 1;
3547 }
3548 return ret;
3549 }
3550
3551 /**
3552 * xmlParseStringName:
3553 * @ctxt: an XML parser context
3554 * @str: a pointer to the string pointer (IN/OUT)
3555 *
3556 * parse an XML name.
3557 *
3558 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3559 * CombiningChar | Extender
3560 *
3561 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3562 *
3563 * [6] Names ::= Name (#x20 Name)*
3564 *
3565 * Returns the Name parsed or NULL. The @str pointer
3566 * is updated to the current location in the string.
3567 */
3568
3569 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3570 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3571 xmlChar buf[XML_MAX_NAMELEN + 5];
3572 const xmlChar *cur = *str;
3573 int len = 0, l;
3574 int c;
3575
3576 #ifdef DEBUG
3577 nbParseStringName++;
3578 #endif
3579
3580 c = CUR_SCHAR(cur, l);
3581 if (!xmlIsNameStartChar(ctxt, c)) {
3582 return(NULL);
3583 }
3584
3585 COPY_BUF(l,buf,len,c);
3586 cur += l;
3587 c = CUR_SCHAR(cur, l);
3588 while (xmlIsNameChar(ctxt, c)) {
3589 COPY_BUF(l,buf,len,c);
3590 cur += l;
3591 c = CUR_SCHAR(cur, l);
3592 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3593 /*
3594 * Okay someone managed to make a huge name, so he's ready to pay
3595 * for the processing speed.
3596 */
3597 xmlChar *buffer;
3598 int max = len * 2;
3599
3600 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3601 if (buffer == NULL) {
3602 xmlErrMemory(ctxt, NULL);
3603 return(NULL);
3604 }
3605 memcpy(buffer, buf, len);
3606 while (xmlIsNameChar(ctxt, c)) {
3607 if (len + 10 > max) {
3608 xmlChar *tmp;
3609
3610 if ((len > XML_MAX_NAME_LENGTH) &&
3611 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3612 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3613 xmlFree(buffer);
3614 return(NULL);
3615 }
3616 max *= 2;
3617 tmp = (xmlChar *) xmlRealloc(buffer,
3618 max * sizeof(xmlChar));
3619 if (tmp == NULL) {
3620 xmlErrMemory(ctxt, NULL);
3621 xmlFree(buffer);
3622 return(NULL);
3623 }
3624 buffer = tmp;
3625 }
3626 COPY_BUF(l,buffer,len,c);
3627 cur += l;
3628 c = CUR_SCHAR(cur, l);
3629 }
3630 buffer[len] = 0;
3631 *str = cur;
3632 return(buffer);
3633 }
3634 }
3635 if ((len > XML_MAX_NAME_LENGTH) &&
3636 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3637 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3638 return(NULL);
3639 }
3640 *str = cur;
3641 return(xmlStrndup(buf, len));
3642 }
3643
3644 /**
3645 * xmlParseNmtoken:
3646 * @ctxt: an XML parser context
3647 *
3648 * parse an XML Nmtoken.
3649 *
3650 * [7] Nmtoken ::= (NameChar)+
3651 *
3652 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3653 *
3654 * Returns the Nmtoken parsed or NULL
3655 */
3656
3657 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3658 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3659 xmlChar buf[XML_MAX_NAMELEN + 5];
3660 int len = 0, l;
3661 int c;
3662 int count = 0;
3663
3664 #ifdef DEBUG
3665 nbParseNmToken++;
3666 #endif
3667
3668 GROW;
3669 if (ctxt->instate == XML_PARSER_EOF)
3670 return(NULL);
3671 c = CUR_CHAR(l);
3672
3673 while (xmlIsNameChar(ctxt, c)) {
3674 if (count++ > XML_PARSER_CHUNK_SIZE) {
3675 count = 0;
3676 GROW;
3677 }
3678 COPY_BUF(l,buf,len,c);
3679 NEXTL(l);
3680 c = CUR_CHAR(l);
3681 if (c == 0) {
3682 count = 0;
3683 GROW;
3684 if (ctxt->instate == XML_PARSER_EOF)
3685 return(NULL);
3686 c = CUR_CHAR(l);
3687 }
3688 if (len >= XML_MAX_NAMELEN) {
3689 /*
3690 * Okay someone managed to make a huge token, so he's ready to pay
3691 * for the processing speed.
3692 */
3693 xmlChar *buffer;
3694 int max = len * 2;
3695
3696 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3697 if (buffer == NULL) {
3698 xmlErrMemory(ctxt, NULL);
3699 return(NULL);
3700 }
3701 memcpy(buffer, buf, len);
3702 while (xmlIsNameChar(ctxt, c)) {
3703 if (count++ > XML_PARSER_CHUNK_SIZE) {
3704 count = 0;
3705 GROW;
3706 if (ctxt->instate == XML_PARSER_EOF) {
3707 xmlFree(buffer);
3708 return(NULL);
3709 }
3710 }
3711 if (len + 10 > max) {
3712 xmlChar *tmp;
3713
3714 if ((max > XML_MAX_NAME_LENGTH) &&
3715 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3716 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3717 xmlFree(buffer);
3718 return(NULL);
3719 }
3720 max *= 2;
3721 tmp = (xmlChar *) xmlRealloc(buffer,
3722 max * sizeof(xmlChar));
3723 if (tmp == NULL) {
3724 xmlErrMemory(ctxt, NULL);
3725 xmlFree(buffer);
3726 return(NULL);
3727 }
3728 buffer = tmp;
3729 }
3730 COPY_BUF(l,buffer,len,c);
3731 NEXTL(l);
3732 c = CUR_CHAR(l);
3733 }
3734 buffer[len] = 0;
3735 return(buffer);
3736 }
3737 }
3738 if (len == 0)
3739 return(NULL);
3740 if ((len > XML_MAX_NAME_LENGTH) &&
3741 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3742 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3743 return(NULL);
3744 }
3745 return(xmlStrndup(buf, len));
3746 }
3747
3748 /**
3749 * xmlParseEntityValue:
3750 * @ctxt: an XML parser context
3751 * @orig: if non-NULL store a copy of the original entity value
3752 *
3753 * parse a value for ENTITY declarations
3754 *
3755 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3756 * "'" ([^%&'] | PEReference | Reference)* "'"
3757 *
3758 * Returns the EntityValue parsed with reference substituted or NULL
3759 */
3760
3761 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3762 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3763 xmlChar *buf = NULL;
3764 int len = 0;
3765 int size = XML_PARSER_BUFFER_SIZE;
3766 int c, l;
3767 xmlChar stop;
3768 xmlChar *ret = NULL;
3769 const xmlChar *cur = NULL;
3770 xmlParserInputPtr input;
3771
3772 if (RAW == '"') stop = '"';
3773 else if (RAW == '\'') stop = '\'';
3774 else {
3775 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3776 return(NULL);
3777 }
3778 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3779 if (buf == NULL) {
3780 xmlErrMemory(ctxt, NULL);
3781 return(NULL);
3782 }
3783
3784 /*
3785 * The content of the entity definition is copied in a buffer.
3786 */
3787
3788 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3789 input = ctxt->input;
3790 GROW;
3791 if (ctxt->instate == XML_PARSER_EOF)
3792 goto error;
3793 NEXT;
3794 c = CUR_CHAR(l);
3795 /*
3796 * NOTE: 4.4.5 Included in Literal
3797 * When a parameter entity reference appears in a literal entity
3798 * value, ... a single or double quote character in the replacement
3799 * text is always treated as a normal data character and will not
3800 * terminate the literal.
3801 * In practice it means we stop the loop only when back at parsing
3802 * the initial entity and the quote is found
3803 */
3804 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3805 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3806 if (len + 5 >= size) {
3807 xmlChar *tmp;
3808
3809 size *= 2;
3810 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3811 if (tmp == NULL) {
3812 xmlErrMemory(ctxt, NULL);
3813 goto error;
3814 }
3815 buf = tmp;
3816 }
3817 COPY_BUF(l,buf,len,c);
3818 NEXTL(l);
3819
3820 GROW;
3821 c = CUR_CHAR(l);
3822 if (c == 0) {
3823 GROW;
3824 c = CUR_CHAR(l);
3825 }
3826 }
3827 buf[len] = 0;
3828 if (ctxt->instate == XML_PARSER_EOF)
3829 goto error;
3830 if (c != stop) {
3831 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3832 goto error;
3833 }
3834 NEXT;
3835
3836 /*
3837 * Raise problem w.r.t. '&' and '%' being used in non-entities
3838 * reference constructs. Note Charref will be handled in
3839 * xmlStringDecodeEntities()
3840 */
3841 cur = buf;
3842 while (*cur != 0) { /* non input consuming */
3843 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3844 xmlChar *name;
3845 xmlChar tmp = *cur;
3846 int nameOk = 0;
3847
3848 cur++;
3849 name = xmlParseStringName(ctxt, &cur);
3850 if (name != NULL) {
3851 nameOk = 1;
3852 xmlFree(name);
3853 }
3854 if ((nameOk == 0) || (*cur != ';')) {
3855 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3856 "EntityValue: '%c' forbidden except for entities references\n",
3857 tmp);
3858 goto error;
3859 }
3860 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3861 (ctxt->inputNr == 1)) {
3862 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3863 goto error;
3864 }
3865 if (*cur == 0)
3866 break;
3867 }
3868 cur++;
3869 }
3870
3871 /*
3872 * Then PEReference entities are substituted.
3873 *
3874 * NOTE: 4.4.7 Bypassed
3875 * When a general entity reference appears in the EntityValue in
3876 * an entity declaration, it is bypassed and left as is.
3877 * so XML_SUBSTITUTE_REF is not set here.
3878 */
3879 ++ctxt->depth;
3880 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3881 0, 0, 0);
3882 --ctxt->depth;
3883 if (orig != NULL) {
3884 *orig = buf;
3885 buf = NULL;
3886 }
3887
3888 error:
3889 if (buf != NULL)
3890 xmlFree(buf);
3891 return(ret);
3892 }
3893
3894 /**
3895 * xmlParseAttValueComplex:
3896 * @ctxt: an XML parser context
3897 * @len: the resulting attribute len
3898 * @normalize: whether to apply the inner normalization
3899 *
3900 * parse a value for an attribute, this is the fallback function
3901 * of xmlParseAttValue() when the attribute parsing requires handling
3902 * of non-ASCII characters, or normalization compaction.
3903 *
3904 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3905 */
3906 static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt,int * attlen,int normalize)3907 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3908 xmlChar limit = 0;
3909 xmlChar *buf = NULL;
3910 xmlChar *rep = NULL;
3911 size_t len = 0;
3912 size_t buf_size = 0;
3913 int c, l, in_space = 0;
3914 xmlChar *current = NULL;
3915 xmlEntityPtr ent;
3916
3917 if (NXT(0) == '"') {
3918 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3919 limit = '"';
3920 NEXT;
3921 } else if (NXT(0) == '\'') {
3922 limit = '\'';
3923 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3924 NEXT;
3925 } else {
3926 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3927 return(NULL);
3928 }
3929
3930 /*
3931 * allocate a translation buffer.
3932 */
3933 buf_size = XML_PARSER_BUFFER_SIZE;
3934 buf = (xmlChar *) xmlMallocAtomic(buf_size);
3935 if (buf == NULL) goto mem_error;
3936
3937 /*
3938 * OK loop until we reach one of the ending char or a size limit.
3939 */
3940 c = CUR_CHAR(l);
3941 while (((NXT(0) != limit) && /* checked */
3942 (IS_CHAR(c)) && (c != '<')) &&
3943 (ctxt->instate != XML_PARSER_EOF)) {
3944 /*
3945 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3946 * special option is given
3947 */
3948 if ((len > XML_MAX_TEXT_LENGTH) &&
3949 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3950 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3951 "AttValue length too long\n");
3952 goto mem_error;
3953 }
3954 if (c == '&') {
3955 in_space = 0;
3956 if (NXT(1) == '#') {
3957 int val = xmlParseCharRef(ctxt);
3958
3959 if (val == '&') {
3960 if (ctxt->replaceEntities) {
3961 if (len + 10 > buf_size) {
3962 growBuffer(buf, 10);
3963 }
3964 buf[len++] = '&';
3965 } else {
3966 /*
3967 * The reparsing will be done in xmlStringGetNodeList()
3968 * called by the attribute() function in SAX.c
3969 */
3970 if (len + 10 > buf_size) {
3971 growBuffer(buf, 10);
3972 }
3973 buf[len++] = '&';
3974 buf[len++] = '#';
3975 buf[len++] = '3';
3976 buf[len++] = '8';
3977 buf[len++] = ';';
3978 }
3979 } else if (val != 0) {
3980 if (len + 10 > buf_size) {
3981 growBuffer(buf, 10);
3982 }
3983 len += xmlCopyChar(0, &buf[len], val);
3984 }
3985 } else {
3986 ent = xmlParseEntityRef(ctxt);
3987 ctxt->nbentities++;
3988 if (ent != NULL)
3989 ctxt->nbentities += ent->owner;
3990 if ((ent != NULL) &&
3991 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3992 if (len + 10 > buf_size) {
3993 growBuffer(buf, 10);
3994 }
3995 if ((ctxt->replaceEntities == 0) &&
3996 (ent->content[0] == '&')) {
3997 buf[len++] = '&';
3998 buf[len++] = '#';
3999 buf[len++] = '3';
4000 buf[len++] = '8';
4001 buf[len++] = ';';
4002 } else {
4003 buf[len++] = ent->content[0];
4004 }
4005 } else if ((ent != NULL) &&
4006 (ctxt->replaceEntities != 0)) {
4007 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4008 ++ctxt->depth;
4009 rep = xmlStringDecodeEntities(ctxt, ent->content,
4010 XML_SUBSTITUTE_REF,
4011 0, 0, 0);
4012 --ctxt->depth;
4013 if (rep != NULL) {
4014 current = rep;
4015 while (*current != 0) { /* non input consuming */
4016 if ((*current == 0xD) || (*current == 0xA) ||
4017 (*current == 0x9)) {
4018 buf[len++] = 0x20;
4019 current++;
4020 } else
4021 buf[len++] = *current++;
4022 if (len + 10 > buf_size) {
4023 growBuffer(buf, 10);
4024 }
4025 }
4026 xmlFree(rep);
4027 rep = NULL;
4028 }
4029 } else {
4030 if (len + 10 > buf_size) {
4031 growBuffer(buf, 10);
4032 }
4033 if (ent->content != NULL)
4034 buf[len++] = ent->content[0];
4035 }
4036 } else if (ent != NULL) {
4037 int i = xmlStrlen(ent->name);
4038 const xmlChar *cur = ent->name;
4039
4040 /*
4041 * This may look absurd but is needed to detect
4042 * entities problems
4043 */
4044 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4045 (ent->content != NULL) && (ent->checked == 0)) {
4046 unsigned long oldnbent = ctxt->nbentities, diff;
4047
4048 ++ctxt->depth;
4049 rep = xmlStringDecodeEntities(ctxt, ent->content,
4050 XML_SUBSTITUTE_REF, 0, 0, 0);
4051 --ctxt->depth;
4052
4053 diff = ctxt->nbentities - oldnbent + 1;
4054 if (diff > INT_MAX / 2)
4055 diff = INT_MAX / 2;
4056 ent->checked = diff * 2;
4057 if (rep != NULL) {
4058 if (xmlStrchr(rep, '<'))
4059 ent->checked |= 1;
4060 xmlFree(rep);
4061 rep = NULL;
4062 } else {
4063 ent->content[0] = 0;
4064 }
4065 }
4066
4067 /*
4068 * Just output the reference
4069 */
4070 buf[len++] = '&';
4071 while (len + i + 10 > buf_size) {
4072 growBuffer(buf, i + 10);
4073 }
4074 for (;i > 0;i--)
4075 buf[len++] = *cur++;
4076 buf[len++] = ';';
4077 }
4078 }
4079 } else {
4080 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4081 if ((len != 0) || (!normalize)) {
4082 if ((!normalize) || (!in_space)) {
4083 COPY_BUF(l,buf,len,0x20);
4084 while (len + 10 > buf_size) {
4085 growBuffer(buf, 10);
4086 }
4087 }
4088 in_space = 1;
4089 }
4090 } else {
4091 in_space = 0;
4092 COPY_BUF(l,buf,len,c);
4093 if (len + 10 > buf_size) {
4094 growBuffer(buf, 10);
4095 }
4096 }
4097 NEXTL(l);
4098 }
4099 GROW;
4100 c = CUR_CHAR(l);
4101 }
4102 if (ctxt->instate == XML_PARSER_EOF)
4103 goto error;
4104
4105 if ((in_space) && (normalize)) {
4106 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4107 }
4108 buf[len] = 0;
4109 if (RAW == '<') {
4110 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4111 } else if (RAW != limit) {
4112 if ((c != 0) && (!IS_CHAR(c))) {
4113 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4114 "invalid character in attribute value\n");
4115 } else {
4116 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4117 "AttValue: ' expected\n");
4118 }
4119 } else
4120 NEXT;
4121
4122 /*
4123 * There we potentially risk an overflow, don't allow attribute value of
4124 * length more than INT_MAX it is a very reasonable assumption !
4125 */
4126 if (len >= INT_MAX) {
4127 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4128 "AttValue length too long\n");
4129 goto mem_error;
4130 }
4131
4132 if (attlen != NULL) *attlen = (int) len;
4133 return(buf);
4134
4135 mem_error:
4136 xmlErrMemory(ctxt, NULL);
4137 error:
4138 if (buf != NULL)
4139 xmlFree(buf);
4140 if (rep != NULL)
4141 xmlFree(rep);
4142 return(NULL);
4143 }
4144
4145 /**
4146 * xmlParseAttValue:
4147 * @ctxt: an XML parser context
4148 *
4149 * parse a value for an attribute
4150 * Note: the parser won't do substitution of entities here, this
4151 * will be handled later in xmlStringGetNodeList
4152 *
4153 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4154 * "'" ([^<&'] | Reference)* "'"
4155 *
4156 * 3.3.3 Attribute-Value Normalization:
4157 * Before the value of an attribute is passed to the application or
4158 * checked for validity, the XML processor must normalize it as follows:
4159 * - a character reference is processed by appending the referenced
4160 * character to the attribute value
4161 * - an entity reference is processed by recursively processing the
4162 * replacement text of the entity
4163 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4164 * appending #x20 to the normalized value, except that only a single
4165 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4166 * parsed entity or the literal entity value of an internal parsed entity
4167 * - other characters are processed by appending them to the normalized value
4168 * If the declared value is not CDATA, then the XML processor must further
4169 * process the normalized attribute value by discarding any leading and
4170 * trailing space (#x20) characters, and by replacing sequences of space
4171 * (#x20) characters by a single space (#x20) character.
4172 * All attributes for which no declaration has been read should be treated
4173 * by a non-validating parser as if declared CDATA.
4174 *
4175 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4176 */
4177
4178
4179 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)4180 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4181 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4182 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4183 }
4184
4185 /**
4186 * xmlParseSystemLiteral:
4187 * @ctxt: an XML parser context
4188 *
4189 * parse an XML Literal
4190 *
4191 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4192 *
4193 * Returns the SystemLiteral parsed or NULL
4194 */
4195
4196 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4197 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4198 xmlChar *buf = NULL;
4199 int len = 0;
4200 int size = XML_PARSER_BUFFER_SIZE;
4201 int cur, l;
4202 xmlChar stop;
4203 int state = ctxt->instate;
4204 int count = 0;
4205
4206 SHRINK;
4207 if (RAW == '"') {
4208 NEXT;
4209 stop = '"';
4210 } else if (RAW == '\'') {
4211 NEXT;
4212 stop = '\'';
4213 } else {
4214 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4215 return(NULL);
4216 }
4217
4218 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4219 if (buf == NULL) {
4220 xmlErrMemory(ctxt, NULL);
4221 return(NULL);
4222 }
4223 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4224 cur = CUR_CHAR(l);
4225 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4226 if (len + 5 >= size) {
4227 xmlChar *tmp;
4228
4229 if ((size > XML_MAX_NAME_LENGTH) &&
4230 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4231 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4232 xmlFree(buf);
4233 ctxt->instate = (xmlParserInputState) state;
4234 return(NULL);
4235 }
4236 size *= 2;
4237 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4238 if (tmp == NULL) {
4239 xmlFree(buf);
4240 xmlErrMemory(ctxt, NULL);
4241 ctxt->instate = (xmlParserInputState) state;
4242 return(NULL);
4243 }
4244 buf = tmp;
4245 }
4246 count++;
4247 if (count > 50) {
4248 SHRINK;
4249 GROW;
4250 count = 0;
4251 if (ctxt->instate == XML_PARSER_EOF) {
4252 xmlFree(buf);
4253 return(NULL);
4254 }
4255 }
4256 COPY_BUF(l,buf,len,cur);
4257 NEXTL(l);
4258 cur = CUR_CHAR(l);
4259 if (cur == 0) {
4260 GROW;
4261 SHRINK;
4262 cur = CUR_CHAR(l);
4263 }
4264 }
4265 buf[len] = 0;
4266 ctxt->instate = (xmlParserInputState) state;
4267 if (!IS_CHAR(cur)) {
4268 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4269 } else {
4270 NEXT;
4271 }
4272 return(buf);
4273 }
4274
4275 /**
4276 * xmlParsePubidLiteral:
4277 * @ctxt: an XML parser context
4278 *
4279 * parse an XML public literal
4280 *
4281 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4282 *
4283 * Returns the PubidLiteral parsed or NULL.
4284 */
4285
4286 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4287 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4288 xmlChar *buf = NULL;
4289 int len = 0;
4290 int size = XML_PARSER_BUFFER_SIZE;
4291 xmlChar cur;
4292 xmlChar stop;
4293 int count = 0;
4294 xmlParserInputState oldstate = ctxt->instate;
4295
4296 SHRINK;
4297 if (RAW == '"') {
4298 NEXT;
4299 stop = '"';
4300 } else if (RAW == '\'') {
4301 NEXT;
4302 stop = '\'';
4303 } else {
4304 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4305 return(NULL);
4306 }
4307 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4308 if (buf == NULL) {
4309 xmlErrMemory(ctxt, NULL);
4310 return(NULL);
4311 }
4312 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4313 cur = CUR;
4314 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4315 if (len + 1 >= size) {
4316 xmlChar *tmp;
4317
4318 if ((size > XML_MAX_NAME_LENGTH) &&
4319 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4320 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4321 xmlFree(buf);
4322 return(NULL);
4323 }
4324 size *= 2;
4325 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4326 if (tmp == NULL) {
4327 xmlErrMemory(ctxt, NULL);
4328 xmlFree(buf);
4329 return(NULL);
4330 }
4331 buf = tmp;
4332 }
4333 buf[len++] = cur;
4334 count++;
4335 if (count > 50) {
4336 SHRINK;
4337 GROW;
4338 count = 0;
4339 if (ctxt->instate == XML_PARSER_EOF) {
4340 xmlFree(buf);
4341 return(NULL);
4342 }
4343 }
4344 NEXT;
4345 cur = CUR;
4346 if (cur == 0) {
4347 GROW;
4348 SHRINK;
4349 cur = CUR;
4350 }
4351 }
4352 buf[len] = 0;
4353 if (cur != stop) {
4354 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4355 } else {
4356 NEXT;
4357 }
4358 ctxt->instate = oldstate;
4359 return(buf);
4360 }
4361
4362 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4363
4364 /*
4365 * used for the test in the inner loop of the char data testing
4366 */
4367 static const unsigned char test_char_data[256] = {
4368 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4369 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4370 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4371 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4372 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4373 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4374 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4375 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4376 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4377 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4378 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4379 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4380 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4381 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4382 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4383 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4384 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4385 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4386 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4387 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4388 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4389 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4390 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4391 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4392 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4395 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4396 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4397 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4398 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4399 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4400 };
4401
4402 /**
4403 * xmlParseCharData:
4404 * @ctxt: an XML parser context
4405 * @cdata: int indicating whether we are within a CDATA section
4406 *
4407 * parse a CharData section.
4408 * if we are within a CDATA section ']]>' marks an end of section.
4409 *
4410 * The right angle bracket (>) may be represented using the string ">",
4411 * and must, for compatibility, be escaped using ">" or a character
4412 * reference when it appears in the string "]]>" in content, when that
4413 * string is not marking the end of a CDATA section.
4414 *
4415 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4416 */
4417
4418 void
xmlParseCharData(xmlParserCtxtPtr ctxt,int cdata)4419 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4420 const xmlChar *in;
4421 int nbchar = 0;
4422 int line = ctxt->input->line;
4423 int col = ctxt->input->col;
4424 int ccol;
4425
4426 SHRINK;
4427 GROW;
4428 /*
4429 * Accelerated common case where input don't need to be
4430 * modified before passing it to the handler.
4431 */
4432 if (!cdata) {
4433 in = ctxt->input->cur;
4434 do {
4435 get_more_space:
4436 while (*in == 0x20) { in++; ctxt->input->col++; }
4437 if (*in == 0xA) {
4438 do {
4439 ctxt->input->line++; ctxt->input->col = 1;
4440 in++;
4441 } while (*in == 0xA);
4442 goto get_more_space;
4443 }
4444 if (*in == '<') {
4445 nbchar = in - ctxt->input->cur;
4446 if (nbchar > 0) {
4447 const xmlChar *tmp = ctxt->input->cur;
4448 ctxt->input->cur = in;
4449
4450 if ((ctxt->sax != NULL) &&
4451 (ctxt->sax->ignorableWhitespace !=
4452 ctxt->sax->characters)) {
4453 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4454 if (ctxt->sax->ignorableWhitespace != NULL)
4455 ctxt->sax->ignorableWhitespace(ctxt->userData,
4456 tmp, nbchar);
4457 } else {
4458 if (ctxt->sax->characters != NULL)
4459 ctxt->sax->characters(ctxt->userData,
4460 tmp, nbchar);
4461 if (*ctxt->space == -1)
4462 *ctxt->space = -2;
4463 }
4464 } else if ((ctxt->sax != NULL) &&
4465 (ctxt->sax->characters != NULL)) {
4466 ctxt->sax->characters(ctxt->userData,
4467 tmp, nbchar);
4468 }
4469 }
4470 return;
4471 }
4472
4473 get_more:
4474 ccol = ctxt->input->col;
4475 while (test_char_data[*in]) {
4476 in++;
4477 ccol++;
4478 }
4479 ctxt->input->col = ccol;
4480 if (*in == 0xA) {
4481 do {
4482 ctxt->input->line++; ctxt->input->col = 1;
4483 in++;
4484 } while (*in == 0xA);
4485 goto get_more;
4486 }
4487 if (*in == ']') {
4488 if ((in[1] == ']') && (in[2] == '>')) {
4489 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4490 ctxt->input->cur = in + 1;
4491 return;
4492 }
4493 in++;
4494 ctxt->input->col++;
4495 goto get_more;
4496 }
4497 nbchar = in - ctxt->input->cur;
4498 if (nbchar > 0) {
4499 if ((ctxt->sax != NULL) &&
4500 (ctxt->sax->ignorableWhitespace !=
4501 ctxt->sax->characters) &&
4502 (IS_BLANK_CH(*ctxt->input->cur))) {
4503 const xmlChar *tmp = ctxt->input->cur;
4504 ctxt->input->cur = in;
4505
4506 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4507 if (ctxt->sax->ignorableWhitespace != NULL)
4508 ctxt->sax->ignorableWhitespace(ctxt->userData,
4509 tmp, nbchar);
4510 } else {
4511 if (ctxt->sax->characters != NULL)
4512 ctxt->sax->characters(ctxt->userData,
4513 tmp, nbchar);
4514 if (*ctxt->space == -1)
4515 *ctxt->space = -2;
4516 }
4517 line = ctxt->input->line;
4518 col = ctxt->input->col;
4519 } else if (ctxt->sax != NULL) {
4520 if (ctxt->sax->characters != NULL)
4521 ctxt->sax->characters(ctxt->userData,
4522 ctxt->input->cur, nbchar);
4523 line = ctxt->input->line;
4524 col = ctxt->input->col;
4525 }
4526 /* something really bad happened in the SAX callback */
4527 if (ctxt->instate != XML_PARSER_CONTENT)
4528 return;
4529 }
4530 ctxt->input->cur = in;
4531 if (*in == 0xD) {
4532 in++;
4533 if (*in == 0xA) {
4534 ctxt->input->cur = in;
4535 in++;
4536 ctxt->input->line++; ctxt->input->col = 1;
4537 continue; /* while */
4538 }
4539 in--;
4540 }
4541 if (*in == '<') {
4542 return;
4543 }
4544 if (*in == '&') {
4545 return;
4546 }
4547 SHRINK;
4548 GROW;
4549 if (ctxt->instate == XML_PARSER_EOF)
4550 return;
4551 in = ctxt->input->cur;
4552 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
4553 nbchar = 0;
4554 }
4555 ctxt->input->line = line;
4556 ctxt->input->col = col;
4557 xmlParseCharDataComplex(ctxt, cdata);
4558 }
4559
4560 /**
4561 * xmlParseCharDataComplex:
4562 * @ctxt: an XML parser context
4563 * @cdata: int indicating whether we are within a CDATA section
4564 *
4565 * parse a CharData section.this is the fallback function
4566 * of xmlParseCharData() when the parsing requires handling
4567 * of non-ASCII characters.
4568 */
4569 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int cdata)4570 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4571 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4572 int nbchar = 0;
4573 int cur, l;
4574 int count = 0;
4575
4576 SHRINK;
4577 GROW;
4578 cur = CUR_CHAR(l);
4579 while ((cur != '<') && /* checked */
4580 (cur != '&') &&
4581 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4582 if ((cur == ']') && (NXT(1) == ']') &&
4583 (NXT(2) == '>')) {
4584 if (cdata) break;
4585 else {
4586 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4587 }
4588 }
4589 COPY_BUF(l,buf,nbchar,cur);
4590 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4591 buf[nbchar] = 0;
4592
4593 /*
4594 * OK the segment is to be consumed as chars.
4595 */
4596 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4597 if (areBlanks(ctxt, buf, nbchar, 0)) {
4598 if (ctxt->sax->ignorableWhitespace != NULL)
4599 ctxt->sax->ignorableWhitespace(ctxt->userData,
4600 buf, nbchar);
4601 } else {
4602 if (ctxt->sax->characters != NULL)
4603 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4604 if ((ctxt->sax->characters !=
4605 ctxt->sax->ignorableWhitespace) &&
4606 (*ctxt->space == -1))
4607 *ctxt->space = -2;
4608 }
4609 }
4610 nbchar = 0;
4611 /* something really bad happened in the SAX callback */
4612 if (ctxt->instate != XML_PARSER_CONTENT)
4613 return;
4614 }
4615 count++;
4616 if (count > 50) {
4617 SHRINK;
4618 GROW;
4619 count = 0;
4620 if (ctxt->instate == XML_PARSER_EOF)
4621 return;
4622 }
4623 NEXTL(l);
4624 cur = CUR_CHAR(l);
4625 }
4626 if (nbchar != 0) {
4627 buf[nbchar] = 0;
4628 /*
4629 * OK the segment is to be consumed as chars.
4630 */
4631 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4632 if (areBlanks(ctxt, buf, nbchar, 0)) {
4633 if (ctxt->sax->ignorableWhitespace != NULL)
4634 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4635 } else {
4636 if (ctxt->sax->characters != NULL)
4637 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4638 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4639 (*ctxt->space == -1))
4640 *ctxt->space = -2;
4641 }
4642 }
4643 }
4644 if ((cur != 0) && (!IS_CHAR(cur))) {
4645 /* Generate the error and skip the offending character */
4646 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4647 "PCDATA invalid Char value %d\n",
4648 cur);
4649 NEXTL(l);
4650 }
4651 }
4652
4653 /**
4654 * xmlParseExternalID:
4655 * @ctxt: an XML parser context
4656 * @publicID: a xmlChar** receiving PubidLiteral
4657 * @strict: indicate whether we should restrict parsing to only
4658 * production [75], see NOTE below
4659 *
4660 * Parse an External ID or a Public ID
4661 *
4662 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4663 * 'PUBLIC' S PubidLiteral S SystemLiteral
4664 *
4665 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4666 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4667 *
4668 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4669 *
4670 * Returns the function returns SystemLiteral and in the second
4671 * case publicID receives PubidLiteral, is strict is off
4672 * it is possible to return NULL and have publicID set.
4673 */
4674
4675 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)4676 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4677 xmlChar *URI = NULL;
4678
4679 SHRINK;
4680
4681 *publicID = NULL;
4682 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4683 SKIP(6);
4684 if (SKIP_BLANKS == 0) {
4685 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4686 "Space required after 'SYSTEM'\n");
4687 }
4688 URI = xmlParseSystemLiteral(ctxt);
4689 if (URI == NULL) {
4690 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4691 }
4692 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4693 SKIP(6);
4694 if (SKIP_BLANKS == 0) {
4695 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4696 "Space required after 'PUBLIC'\n");
4697 }
4698 *publicID = xmlParsePubidLiteral(ctxt);
4699 if (*publicID == NULL) {
4700 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4701 }
4702 if (strict) {
4703 /*
4704 * We don't handle [83] so "S SystemLiteral" is required.
4705 */
4706 if (SKIP_BLANKS == 0) {
4707 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4708 "Space required after the Public Identifier\n");
4709 }
4710 } else {
4711 /*
4712 * We handle [83] so we return immediately, if
4713 * "S SystemLiteral" is not detected. We skip blanks if no
4714 * system literal was found, but this is harmless since we must
4715 * be at the end of a NotationDecl.
4716 */
4717 if (SKIP_BLANKS == 0) return(NULL);
4718 if ((CUR != '\'') && (CUR != '"')) return(NULL);
4719 }
4720 URI = xmlParseSystemLiteral(ctxt);
4721 if (URI == NULL) {
4722 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4723 }
4724 }
4725 return(URI);
4726 }
4727
4728 /**
4729 * xmlParseCommentComplex:
4730 * @ctxt: an XML parser context
4731 * @buf: the already parsed part of the buffer
4732 * @len: number of bytes in the buffer
4733 * @size: allocated size of the buffer
4734 *
4735 * Skip an XML (SGML) comment <!-- .... -->
4736 * The spec says that "For compatibility, the string "--" (double-hyphen)
4737 * must not occur within comments. "
4738 * This is the slow routine in case the accelerator for ascii didn't work
4739 *
4740 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4741 */
4742 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,size_t len,size_t size)4743 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4744 size_t len, size_t size) {
4745 int q, ql;
4746 int r, rl;
4747 int cur, l;
4748 size_t count = 0;
4749 int inputid;
4750
4751 inputid = ctxt->input->id;
4752
4753 if (buf == NULL) {
4754 len = 0;
4755 size = XML_PARSER_BUFFER_SIZE;
4756 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4757 if (buf == NULL) {
4758 xmlErrMemory(ctxt, NULL);
4759 return;
4760 }
4761 }
4762 GROW; /* Assure there's enough input data */
4763 q = CUR_CHAR(ql);
4764 if (q == 0)
4765 goto not_terminated;
4766 if (!IS_CHAR(q)) {
4767 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4768 "xmlParseComment: invalid xmlChar value %d\n",
4769 q);
4770 xmlFree (buf);
4771 return;
4772 }
4773 NEXTL(ql);
4774 r = CUR_CHAR(rl);
4775 if (r == 0)
4776 goto not_terminated;
4777 if (!IS_CHAR(r)) {
4778 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4779 "xmlParseComment: invalid xmlChar value %d\n",
4780 q);
4781 xmlFree (buf);
4782 return;
4783 }
4784 NEXTL(rl);
4785 cur = CUR_CHAR(l);
4786 if (cur == 0)
4787 goto not_terminated;
4788 while (IS_CHAR(cur) && /* checked */
4789 ((cur != '>') ||
4790 (r != '-') || (q != '-'))) {
4791 if ((r == '-') && (q == '-')) {
4792 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4793 }
4794 if ((len > XML_MAX_TEXT_LENGTH) &&
4795 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4796 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4797 "Comment too big found", NULL);
4798 xmlFree (buf);
4799 return;
4800 }
4801 if (len + 5 >= size) {
4802 xmlChar *new_buf;
4803 size_t new_size;
4804
4805 new_size = size * 2;
4806 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4807 if (new_buf == NULL) {
4808 xmlFree (buf);
4809 xmlErrMemory(ctxt, NULL);
4810 return;
4811 }
4812 buf = new_buf;
4813 size = new_size;
4814 }
4815 COPY_BUF(ql,buf,len,q);
4816 q = r;
4817 ql = rl;
4818 r = cur;
4819 rl = l;
4820
4821 count++;
4822 if (count > 50) {
4823 SHRINK;
4824 GROW;
4825 count = 0;
4826 if (ctxt->instate == XML_PARSER_EOF) {
4827 xmlFree(buf);
4828 return;
4829 }
4830 }
4831 NEXTL(l);
4832 cur = CUR_CHAR(l);
4833 if (cur == 0) {
4834 SHRINK;
4835 GROW;
4836 cur = CUR_CHAR(l);
4837 }
4838 }
4839 buf[len] = 0;
4840 if (cur == 0) {
4841 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4842 "Comment not terminated \n<!--%.50s\n", buf);
4843 } else if (!IS_CHAR(cur)) {
4844 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4845 "xmlParseComment: invalid xmlChar value %d\n",
4846 cur);
4847 } else {
4848 if (inputid != ctxt->input->id) {
4849 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4850 "Comment doesn't start and stop in the same"
4851 " entity\n");
4852 }
4853 NEXT;
4854 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4855 (!ctxt->disableSAX))
4856 ctxt->sax->comment(ctxt->userData, buf);
4857 }
4858 xmlFree(buf);
4859 return;
4860 not_terminated:
4861 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4862 "Comment not terminated\n", NULL);
4863 xmlFree(buf);
4864 return;
4865 }
4866
4867 /**
4868 * xmlParseComment:
4869 * @ctxt: an XML parser context
4870 *
4871 * Skip an XML (SGML) comment <!-- .... -->
4872 * The spec says that "For compatibility, the string "--" (double-hyphen)
4873 * must not occur within comments. "
4874 *
4875 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4876 */
4877 void
xmlParseComment(xmlParserCtxtPtr ctxt)4878 xmlParseComment(xmlParserCtxtPtr ctxt) {
4879 xmlChar *buf = NULL;
4880 size_t size = XML_PARSER_BUFFER_SIZE;
4881 size_t len = 0;
4882 xmlParserInputState state;
4883 const xmlChar *in;
4884 size_t nbchar = 0;
4885 int ccol;
4886 int inputid;
4887
4888 /*
4889 * Check that there is a comment right here.
4890 */
4891 if ((RAW != '<') || (NXT(1) != '!') ||
4892 (NXT(2) != '-') || (NXT(3) != '-')) return;
4893 state = ctxt->instate;
4894 ctxt->instate = XML_PARSER_COMMENT;
4895 inputid = ctxt->input->id;
4896 SKIP(4);
4897 SHRINK;
4898 GROW;
4899
4900 /*
4901 * Accelerated common case where input don't need to be
4902 * modified before passing it to the handler.
4903 */
4904 in = ctxt->input->cur;
4905 do {
4906 if (*in == 0xA) {
4907 do {
4908 ctxt->input->line++; ctxt->input->col = 1;
4909 in++;
4910 } while (*in == 0xA);
4911 }
4912 get_more:
4913 ccol = ctxt->input->col;
4914 while (((*in > '-') && (*in <= 0x7F)) ||
4915 ((*in >= 0x20) && (*in < '-')) ||
4916 (*in == 0x09)) {
4917 in++;
4918 ccol++;
4919 }
4920 ctxt->input->col = ccol;
4921 if (*in == 0xA) {
4922 do {
4923 ctxt->input->line++; ctxt->input->col = 1;
4924 in++;
4925 } while (*in == 0xA);
4926 goto get_more;
4927 }
4928 nbchar = in - ctxt->input->cur;
4929 /*
4930 * save current set of data
4931 */
4932 if (nbchar > 0) {
4933 if ((ctxt->sax != NULL) &&
4934 (ctxt->sax->comment != NULL)) {
4935 if (buf == NULL) {
4936 if ((*in == '-') && (in[1] == '-'))
4937 size = nbchar + 1;
4938 else
4939 size = XML_PARSER_BUFFER_SIZE + nbchar;
4940 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4941 if (buf == NULL) {
4942 xmlErrMemory(ctxt, NULL);
4943 ctxt->instate = state;
4944 return;
4945 }
4946 len = 0;
4947 } else if (len + nbchar + 1 >= size) {
4948 xmlChar *new_buf;
4949 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4950 new_buf = (xmlChar *) xmlRealloc(buf,
4951 size * sizeof(xmlChar));
4952 if (new_buf == NULL) {
4953 xmlFree (buf);
4954 xmlErrMemory(ctxt, NULL);
4955 ctxt->instate = state;
4956 return;
4957 }
4958 buf = new_buf;
4959 }
4960 memcpy(&buf[len], ctxt->input->cur, nbchar);
4961 len += nbchar;
4962 buf[len] = 0;
4963 }
4964 }
4965 if ((len > XML_MAX_TEXT_LENGTH) &&
4966 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4967 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4968 "Comment too big found", NULL);
4969 xmlFree (buf);
4970 return;
4971 }
4972 ctxt->input->cur = in;
4973 if (*in == 0xA) {
4974 in++;
4975 ctxt->input->line++; ctxt->input->col = 1;
4976 }
4977 if (*in == 0xD) {
4978 in++;
4979 if (*in == 0xA) {
4980 ctxt->input->cur = in;
4981 in++;
4982 ctxt->input->line++; ctxt->input->col = 1;
4983 continue; /* while */
4984 }
4985 in--;
4986 }
4987 SHRINK;
4988 GROW;
4989 if (ctxt->instate == XML_PARSER_EOF) {
4990 xmlFree(buf);
4991 return;
4992 }
4993 in = ctxt->input->cur;
4994 if (*in == '-') {
4995 if (in[1] == '-') {
4996 if (in[2] == '>') {
4997 if (ctxt->input->id != inputid) {
4998 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4999 "comment doesn't start and stop in the"
5000 " same entity\n");
5001 }
5002 SKIP(3);
5003 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5004 (!ctxt->disableSAX)) {
5005 if (buf != NULL)
5006 ctxt->sax->comment(ctxt->userData, buf);
5007 else
5008 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5009 }
5010 if (buf != NULL)
5011 xmlFree(buf);
5012 if (ctxt->instate != XML_PARSER_EOF)
5013 ctxt->instate = state;
5014 return;
5015 }
5016 if (buf != NULL) {
5017 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5018 "Double hyphen within comment: "
5019 "<!--%.50s\n",
5020 buf);
5021 } else
5022 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5023 "Double hyphen within comment\n", NULL);
5024 if (ctxt->instate == XML_PARSER_EOF) {
5025 xmlFree(buf);
5026 return;
5027 }
5028 in++;
5029 ctxt->input->col++;
5030 }
5031 in++;
5032 ctxt->input->col++;
5033 goto get_more;
5034 }
5035 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5036 xmlParseCommentComplex(ctxt, buf, len, size);
5037 ctxt->instate = state;
5038 return;
5039 }
5040
5041
5042 /**
5043 * xmlParsePITarget:
5044 * @ctxt: an XML parser context
5045 *
5046 * parse the name of a PI
5047 *
5048 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5049 *
5050 * Returns the PITarget name or NULL
5051 */
5052
5053 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)5054 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5055 const xmlChar *name;
5056
5057 name = xmlParseName(ctxt);
5058 if ((name != NULL) &&
5059 ((name[0] == 'x') || (name[0] == 'X')) &&
5060 ((name[1] == 'm') || (name[1] == 'M')) &&
5061 ((name[2] == 'l') || (name[2] == 'L'))) {
5062 int i;
5063 if ((name[0] == 'x') && (name[1] == 'm') &&
5064 (name[2] == 'l') && (name[3] == 0)) {
5065 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5066 "XML declaration allowed only at the start of the document\n");
5067 return(name);
5068 } else if (name[3] == 0) {
5069 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5070 return(name);
5071 }
5072 for (i = 0;;i++) {
5073 if (xmlW3CPIs[i] == NULL) break;
5074 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5075 return(name);
5076 }
5077 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5078 "xmlParsePITarget: invalid name prefix 'xml'\n",
5079 NULL, NULL);
5080 }
5081 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5082 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5083 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5084 }
5085 return(name);
5086 }
5087
5088 #ifdef LIBXML_CATALOG_ENABLED
5089 /**
5090 * xmlParseCatalogPI:
5091 * @ctxt: an XML parser context
5092 * @catalog: the PI value string
5093 *
5094 * parse an XML Catalog Processing Instruction.
5095 *
5096 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5097 *
5098 * Occurs only if allowed by the user and if happening in the Misc
5099 * part of the document before any doctype information
5100 * This will add the given catalog to the parsing context in order
5101 * to be used if there is a resolution need further down in the document
5102 */
5103
5104 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)5105 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5106 xmlChar *URL = NULL;
5107 const xmlChar *tmp, *base;
5108 xmlChar marker;
5109
5110 tmp = catalog;
5111 while (IS_BLANK_CH(*tmp)) tmp++;
5112 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5113 goto error;
5114 tmp += 7;
5115 while (IS_BLANK_CH(*tmp)) tmp++;
5116 if (*tmp != '=') {
5117 return;
5118 }
5119 tmp++;
5120 while (IS_BLANK_CH(*tmp)) tmp++;
5121 marker = *tmp;
5122 if ((marker != '\'') && (marker != '"'))
5123 goto error;
5124 tmp++;
5125 base = tmp;
5126 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5127 if (*tmp == 0)
5128 goto error;
5129 URL = xmlStrndup(base, tmp - base);
5130 tmp++;
5131 while (IS_BLANK_CH(*tmp)) tmp++;
5132 if (*tmp != 0)
5133 goto error;
5134
5135 if (URL != NULL) {
5136 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5137 xmlFree(URL);
5138 }
5139 return;
5140
5141 error:
5142 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5143 "Catalog PI syntax error: %s\n",
5144 catalog, NULL);
5145 if (URL != NULL)
5146 xmlFree(URL);
5147 }
5148 #endif
5149
5150 /**
5151 * xmlParsePI:
5152 * @ctxt: an XML parser context
5153 *
5154 * parse an XML Processing Instruction.
5155 *
5156 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5157 *
5158 * The processing is transferred to SAX once parsed.
5159 */
5160
5161 void
xmlParsePI(xmlParserCtxtPtr ctxt)5162 xmlParsePI(xmlParserCtxtPtr ctxt) {
5163 xmlChar *buf = NULL;
5164 size_t len = 0;
5165 size_t size = XML_PARSER_BUFFER_SIZE;
5166 int cur, l;
5167 const xmlChar *target;
5168 xmlParserInputState state;
5169 int count = 0;
5170
5171 if ((RAW == '<') && (NXT(1) == '?')) {
5172 int inputid = ctxt->input->id;
5173 state = ctxt->instate;
5174 ctxt->instate = XML_PARSER_PI;
5175 /*
5176 * this is a Processing Instruction.
5177 */
5178 SKIP(2);
5179 SHRINK;
5180
5181 /*
5182 * Parse the target name and check for special support like
5183 * namespace.
5184 */
5185 target = xmlParsePITarget(ctxt);
5186 if (target != NULL) {
5187 if ((RAW == '?') && (NXT(1) == '>')) {
5188 if (inputid != ctxt->input->id) {
5189 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5190 "PI declaration doesn't start and stop in"
5191 " the same entity\n");
5192 }
5193 SKIP(2);
5194
5195 /*
5196 * SAX: PI detected.
5197 */
5198 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5199 (ctxt->sax->processingInstruction != NULL))
5200 ctxt->sax->processingInstruction(ctxt->userData,
5201 target, NULL);
5202 if (ctxt->instate != XML_PARSER_EOF)
5203 ctxt->instate = state;
5204 return;
5205 }
5206 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5207 if (buf == NULL) {
5208 xmlErrMemory(ctxt, NULL);
5209 ctxt->instate = state;
5210 return;
5211 }
5212 if (SKIP_BLANKS == 0) {
5213 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5214 "ParsePI: PI %s space expected\n", target);
5215 }
5216 cur = CUR_CHAR(l);
5217 while (IS_CHAR(cur) && /* checked */
5218 ((cur != '?') || (NXT(1) != '>'))) {
5219 if (len + 5 >= size) {
5220 xmlChar *tmp;
5221 size_t new_size = size * 2;
5222 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5223 if (tmp == NULL) {
5224 xmlErrMemory(ctxt, NULL);
5225 xmlFree(buf);
5226 ctxt->instate = state;
5227 return;
5228 }
5229 buf = tmp;
5230 size = new_size;
5231 }
5232 count++;
5233 if (count > 50) {
5234 SHRINK;
5235 GROW;
5236 if (ctxt->instate == XML_PARSER_EOF) {
5237 xmlFree(buf);
5238 return;
5239 }
5240 count = 0;
5241 if ((len > XML_MAX_TEXT_LENGTH) &&
5242 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5243 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5244 "PI %s too big found", target);
5245 xmlFree(buf);
5246 ctxt->instate = state;
5247 return;
5248 }
5249 }
5250 COPY_BUF(l,buf,len,cur);
5251 NEXTL(l);
5252 cur = CUR_CHAR(l);
5253 if (cur == 0) {
5254 SHRINK;
5255 GROW;
5256 cur = CUR_CHAR(l);
5257 }
5258 }
5259 if ((len > XML_MAX_TEXT_LENGTH) &&
5260 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5261 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5262 "PI %s too big found", target);
5263 xmlFree(buf);
5264 ctxt->instate = state;
5265 return;
5266 }
5267 buf[len] = 0;
5268 if (cur != '?') {
5269 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5270 "ParsePI: PI %s never end ...\n", target);
5271 } else {
5272 if (inputid != ctxt->input->id) {
5273 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5274 "PI declaration doesn't start and stop in"
5275 " the same entity\n");
5276 }
5277 SKIP(2);
5278
5279 #ifdef LIBXML_CATALOG_ENABLED
5280 if (((state == XML_PARSER_MISC) ||
5281 (state == XML_PARSER_START)) &&
5282 (xmlStrEqual(target, XML_CATALOG_PI))) {
5283 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5284 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5285 (allow == XML_CATA_ALLOW_ALL))
5286 xmlParseCatalogPI(ctxt, buf);
5287 }
5288 #endif
5289
5290
5291 /*
5292 * SAX: PI detected.
5293 */
5294 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5295 (ctxt->sax->processingInstruction != NULL))
5296 ctxt->sax->processingInstruction(ctxt->userData,
5297 target, buf);
5298 }
5299 xmlFree(buf);
5300 } else {
5301 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5302 }
5303 if (ctxt->instate != XML_PARSER_EOF)
5304 ctxt->instate = state;
5305 }
5306 }
5307
5308 /**
5309 * xmlParseNotationDecl:
5310 * @ctxt: an XML parser context
5311 *
5312 * parse a notation declaration
5313 *
5314 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5315 *
5316 * Hence there is actually 3 choices:
5317 * 'PUBLIC' S PubidLiteral
5318 * 'PUBLIC' S PubidLiteral S SystemLiteral
5319 * and 'SYSTEM' S SystemLiteral
5320 *
5321 * See the NOTE on xmlParseExternalID().
5322 */
5323
5324 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5325 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5326 const xmlChar *name;
5327 xmlChar *Pubid;
5328 xmlChar *Systemid;
5329
5330 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5331 int inputid = ctxt->input->id;
5332 SHRINK;
5333 SKIP(10);
5334 if (SKIP_BLANKS == 0) {
5335 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5336 "Space required after '<!NOTATION'\n");
5337 return;
5338 }
5339
5340 name = xmlParseName(ctxt);
5341 if (name == NULL) {
5342 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5343 return;
5344 }
5345 if (xmlStrchr(name, ':') != NULL) {
5346 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5347 "colons are forbidden from notation names '%s'\n",
5348 name, NULL, NULL);
5349 }
5350 if (SKIP_BLANKS == 0) {
5351 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5352 "Space required after the NOTATION name'\n");
5353 return;
5354 }
5355
5356 /*
5357 * Parse the IDs.
5358 */
5359 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5360 SKIP_BLANKS;
5361
5362 if (RAW == '>') {
5363 if (inputid != ctxt->input->id) {
5364 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5365 "Notation declaration doesn't start and stop"
5366 " in the same entity\n");
5367 }
5368 NEXT;
5369 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5370 (ctxt->sax->notationDecl != NULL))
5371 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5372 } else {
5373 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5374 }
5375 if (Systemid != NULL) xmlFree(Systemid);
5376 if (Pubid != NULL) xmlFree(Pubid);
5377 }
5378 }
5379
5380 /**
5381 * xmlParseEntityDecl:
5382 * @ctxt: an XML parser context
5383 *
5384 * parse <!ENTITY declarations
5385 *
5386 * [70] EntityDecl ::= GEDecl | PEDecl
5387 *
5388 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5389 *
5390 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5391 *
5392 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5393 *
5394 * [74] PEDef ::= EntityValue | ExternalID
5395 *
5396 * [76] NDataDecl ::= S 'NDATA' S Name
5397 *
5398 * [ VC: Notation Declared ]
5399 * The Name must match the declared name of a notation.
5400 */
5401
5402 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5403 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5404 const xmlChar *name = NULL;
5405 xmlChar *value = NULL;
5406 xmlChar *URI = NULL, *literal = NULL;
5407 const xmlChar *ndata = NULL;
5408 int isParameter = 0;
5409 xmlChar *orig = NULL;
5410
5411 /* GROW; done in the caller */
5412 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5413 int inputid = ctxt->input->id;
5414 SHRINK;
5415 SKIP(8);
5416 if (SKIP_BLANKS == 0) {
5417 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5418 "Space required after '<!ENTITY'\n");
5419 }
5420
5421 if (RAW == '%') {
5422 NEXT;
5423 if (SKIP_BLANKS == 0) {
5424 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5425 "Space required after '%%'\n");
5426 }
5427 isParameter = 1;
5428 }
5429
5430 name = xmlParseName(ctxt);
5431 if (name == NULL) {
5432 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5433 "xmlParseEntityDecl: no name\n");
5434 return;
5435 }
5436 if (xmlStrchr(name, ':') != NULL) {
5437 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5438 "colons are forbidden from entities names '%s'\n",
5439 name, NULL, NULL);
5440 }
5441 if (SKIP_BLANKS == 0) {
5442 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5443 "Space required after the entity name\n");
5444 }
5445
5446 ctxt->instate = XML_PARSER_ENTITY_DECL;
5447 /*
5448 * handle the various case of definitions...
5449 */
5450 if (isParameter) {
5451 if ((RAW == '"') || (RAW == '\'')) {
5452 value = xmlParseEntityValue(ctxt, &orig);
5453 if (value) {
5454 if ((ctxt->sax != NULL) &&
5455 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5456 ctxt->sax->entityDecl(ctxt->userData, name,
5457 XML_INTERNAL_PARAMETER_ENTITY,
5458 NULL, NULL, value);
5459 }
5460 } else {
5461 URI = xmlParseExternalID(ctxt, &literal, 1);
5462 if ((URI == NULL) && (literal == NULL)) {
5463 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5464 }
5465 if (URI) {
5466 xmlURIPtr uri;
5467
5468 uri = xmlParseURI((const char *) URI);
5469 if (uri == NULL) {
5470 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5471 "Invalid URI: %s\n", URI);
5472 /*
5473 * This really ought to be a well formedness error
5474 * but the XML Core WG decided otherwise c.f. issue
5475 * E26 of the XML erratas.
5476 */
5477 } else {
5478 if (uri->fragment != NULL) {
5479 /*
5480 * Okay this is foolish to block those but not
5481 * invalid URIs.
5482 */
5483 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5484 } else {
5485 if ((ctxt->sax != NULL) &&
5486 (!ctxt->disableSAX) &&
5487 (ctxt->sax->entityDecl != NULL))
5488 ctxt->sax->entityDecl(ctxt->userData, name,
5489 XML_EXTERNAL_PARAMETER_ENTITY,
5490 literal, URI, NULL);
5491 }
5492 xmlFreeURI(uri);
5493 }
5494 }
5495 }
5496 } else {
5497 if ((RAW == '"') || (RAW == '\'')) {
5498 value = xmlParseEntityValue(ctxt, &orig);
5499 if ((ctxt->sax != NULL) &&
5500 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5501 ctxt->sax->entityDecl(ctxt->userData, name,
5502 XML_INTERNAL_GENERAL_ENTITY,
5503 NULL, NULL, value);
5504 /*
5505 * For expat compatibility in SAX mode.
5506 */
5507 if ((ctxt->myDoc == NULL) ||
5508 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5509 if (ctxt->myDoc == NULL) {
5510 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5511 if (ctxt->myDoc == NULL) {
5512 xmlErrMemory(ctxt, "New Doc failed");
5513 return;
5514 }
5515 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5516 }
5517 if (ctxt->myDoc->intSubset == NULL)
5518 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5519 BAD_CAST "fake", NULL, NULL);
5520
5521 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5522 NULL, NULL, value);
5523 }
5524 } else {
5525 URI = xmlParseExternalID(ctxt, &literal, 1);
5526 if ((URI == NULL) && (literal == NULL)) {
5527 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5528 }
5529 if (URI) {
5530 xmlURIPtr uri;
5531
5532 uri = xmlParseURI((const char *)URI);
5533 if (uri == NULL) {
5534 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5535 "Invalid URI: %s\n", URI);
5536 /*
5537 * This really ought to be a well formedness error
5538 * but the XML Core WG decided otherwise c.f. issue
5539 * E26 of the XML erratas.
5540 */
5541 } else {
5542 if (uri->fragment != NULL) {
5543 /*
5544 * Okay this is foolish to block those but not
5545 * invalid URIs.
5546 */
5547 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5548 }
5549 xmlFreeURI(uri);
5550 }
5551 }
5552 if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5553 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5554 "Space required before 'NDATA'\n");
5555 }
5556 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5557 SKIP(5);
5558 if (SKIP_BLANKS == 0) {
5559 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5560 "Space required after 'NDATA'\n");
5561 }
5562 ndata = xmlParseName(ctxt);
5563 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5564 (ctxt->sax->unparsedEntityDecl != NULL))
5565 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5566 literal, URI, ndata);
5567 } else {
5568 if ((ctxt->sax != NULL) &&
5569 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5570 ctxt->sax->entityDecl(ctxt->userData, name,
5571 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5572 literal, URI, NULL);
5573 /*
5574 * For expat compatibility in SAX mode.
5575 * assuming the entity replacement was asked for
5576 */
5577 if ((ctxt->replaceEntities != 0) &&
5578 ((ctxt->myDoc == NULL) ||
5579 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5580 if (ctxt->myDoc == NULL) {
5581 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5582 if (ctxt->myDoc == NULL) {
5583 xmlErrMemory(ctxt, "New Doc failed");
5584 return;
5585 }
5586 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5587 }
5588
5589 if (ctxt->myDoc->intSubset == NULL)
5590 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5591 BAD_CAST "fake", NULL, NULL);
5592 xmlSAX2EntityDecl(ctxt, name,
5593 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5594 literal, URI, NULL);
5595 }
5596 }
5597 }
5598 }
5599 if (ctxt->instate == XML_PARSER_EOF)
5600 goto done;
5601 SKIP_BLANKS;
5602 if (RAW != '>') {
5603 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5604 "xmlParseEntityDecl: entity %s not terminated\n", name);
5605 xmlHaltParser(ctxt);
5606 } else {
5607 if (inputid != ctxt->input->id) {
5608 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5609 "Entity declaration doesn't start and stop in"
5610 " the same entity\n");
5611 }
5612 NEXT;
5613 }
5614 if (orig != NULL) {
5615 /*
5616 * Ugly mechanism to save the raw entity value.
5617 */
5618 xmlEntityPtr cur = NULL;
5619
5620 if (isParameter) {
5621 if ((ctxt->sax != NULL) &&
5622 (ctxt->sax->getParameterEntity != NULL))
5623 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5624 } else {
5625 if ((ctxt->sax != NULL) &&
5626 (ctxt->sax->getEntity != NULL))
5627 cur = ctxt->sax->getEntity(ctxt->userData, name);
5628 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5629 cur = xmlSAX2GetEntity(ctxt, name);
5630 }
5631 }
5632 if ((cur != NULL) && (cur->orig == NULL)) {
5633 cur->orig = orig;
5634 orig = NULL;
5635 }
5636 }
5637
5638 done:
5639 if (value != NULL) xmlFree(value);
5640 if (URI != NULL) xmlFree(URI);
5641 if (literal != NULL) xmlFree(literal);
5642 if (orig != NULL) xmlFree(orig);
5643 }
5644 }
5645
5646 /**
5647 * xmlParseDefaultDecl:
5648 * @ctxt: an XML parser context
5649 * @value: Receive a possible fixed default value for the attribute
5650 *
5651 * Parse an attribute default declaration
5652 *
5653 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5654 *
5655 * [ VC: Required Attribute ]
5656 * if the default declaration is the keyword #REQUIRED, then the
5657 * attribute must be specified for all elements of the type in the
5658 * attribute-list declaration.
5659 *
5660 * [ VC: Attribute Default Legal ]
5661 * The declared default value must meet the lexical constraints of
5662 * the declared attribute type c.f. xmlValidateAttributeDecl()
5663 *
5664 * [ VC: Fixed Attribute Default ]
5665 * if an attribute has a default value declared with the #FIXED
5666 * keyword, instances of that attribute must match the default value.
5667 *
5668 * [ WFC: No < in Attribute Values ]
5669 * handled in xmlParseAttValue()
5670 *
5671 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5672 * or XML_ATTRIBUTE_FIXED.
5673 */
5674
5675 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5676 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5677 int val;
5678 xmlChar *ret;
5679
5680 *value = NULL;
5681 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5682 SKIP(9);
5683 return(XML_ATTRIBUTE_REQUIRED);
5684 }
5685 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5686 SKIP(8);
5687 return(XML_ATTRIBUTE_IMPLIED);
5688 }
5689 val = XML_ATTRIBUTE_NONE;
5690 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5691 SKIP(6);
5692 val = XML_ATTRIBUTE_FIXED;
5693 if (SKIP_BLANKS == 0) {
5694 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5695 "Space required after '#FIXED'\n");
5696 }
5697 }
5698 ret = xmlParseAttValue(ctxt);
5699 ctxt->instate = XML_PARSER_DTD;
5700 if (ret == NULL) {
5701 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5702 "Attribute default value declaration error\n");
5703 } else
5704 *value = ret;
5705 return(val);
5706 }
5707
5708 /**
5709 * xmlParseNotationType:
5710 * @ctxt: an XML parser context
5711 *
5712 * parse an Notation attribute type.
5713 *
5714 * Note: the leading 'NOTATION' S part has already being parsed...
5715 *
5716 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5717 *
5718 * [ VC: Notation Attributes ]
5719 * Values of this type must match one of the notation names included
5720 * in the declaration; all notation names in the declaration must be declared.
5721 *
5722 * Returns: the notation attribute tree built while parsing
5723 */
5724
5725 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5726 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5727 const xmlChar *name;
5728 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5729
5730 if (RAW != '(') {
5731 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5732 return(NULL);
5733 }
5734 SHRINK;
5735 do {
5736 NEXT;
5737 SKIP_BLANKS;
5738 name = xmlParseName(ctxt);
5739 if (name == NULL) {
5740 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5741 "Name expected in NOTATION declaration\n");
5742 xmlFreeEnumeration(ret);
5743 return(NULL);
5744 }
5745 tmp = ret;
5746 while (tmp != NULL) {
5747 if (xmlStrEqual(name, tmp->name)) {
5748 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5749 "standalone: attribute notation value token %s duplicated\n",
5750 name, NULL);
5751 if (!xmlDictOwns(ctxt->dict, name))
5752 xmlFree((xmlChar *) name);
5753 break;
5754 }
5755 tmp = tmp->next;
5756 }
5757 if (tmp == NULL) {
5758 cur = xmlCreateEnumeration(name);
5759 if (cur == NULL) {
5760 xmlFreeEnumeration(ret);
5761 return(NULL);
5762 }
5763 if (last == NULL) ret = last = cur;
5764 else {
5765 last->next = cur;
5766 last = cur;
5767 }
5768 }
5769 SKIP_BLANKS;
5770 } while (RAW == '|');
5771 if (RAW != ')') {
5772 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5773 xmlFreeEnumeration(ret);
5774 return(NULL);
5775 }
5776 NEXT;
5777 return(ret);
5778 }
5779
5780 /**
5781 * xmlParseEnumerationType:
5782 * @ctxt: an XML parser context
5783 *
5784 * parse an Enumeration attribute type.
5785 *
5786 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5787 *
5788 * [ VC: Enumeration ]
5789 * Values of this type must match one of the Nmtoken tokens in
5790 * the declaration
5791 *
5792 * Returns: the enumeration attribute tree built while parsing
5793 */
5794
5795 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)5796 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5797 xmlChar *name;
5798 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5799
5800 if (RAW != '(') {
5801 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5802 return(NULL);
5803 }
5804 SHRINK;
5805 do {
5806 NEXT;
5807 SKIP_BLANKS;
5808 name = xmlParseNmtoken(ctxt);
5809 if (name == NULL) {
5810 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5811 return(ret);
5812 }
5813 tmp = ret;
5814 while (tmp != NULL) {
5815 if (xmlStrEqual(name, tmp->name)) {
5816 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5817 "standalone: attribute enumeration value token %s duplicated\n",
5818 name, NULL);
5819 if (!xmlDictOwns(ctxt->dict, name))
5820 xmlFree(name);
5821 break;
5822 }
5823 tmp = tmp->next;
5824 }
5825 if (tmp == NULL) {
5826 cur = xmlCreateEnumeration(name);
5827 if (!xmlDictOwns(ctxt->dict, name))
5828 xmlFree(name);
5829 if (cur == NULL) {
5830 xmlFreeEnumeration(ret);
5831 return(NULL);
5832 }
5833 if (last == NULL) ret = last = cur;
5834 else {
5835 last->next = cur;
5836 last = cur;
5837 }
5838 }
5839 SKIP_BLANKS;
5840 } while (RAW == '|');
5841 if (RAW != ')') {
5842 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5843 return(ret);
5844 }
5845 NEXT;
5846 return(ret);
5847 }
5848
5849 /**
5850 * xmlParseEnumeratedType:
5851 * @ctxt: an XML parser context
5852 * @tree: the enumeration tree built while parsing
5853 *
5854 * parse an Enumerated attribute type.
5855 *
5856 * [57] EnumeratedType ::= NotationType | Enumeration
5857 *
5858 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5859 *
5860 *
5861 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5862 */
5863
5864 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5865 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5866 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5867 SKIP(8);
5868 if (SKIP_BLANKS == 0) {
5869 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5870 "Space required after 'NOTATION'\n");
5871 return(0);
5872 }
5873 *tree = xmlParseNotationType(ctxt);
5874 if (*tree == NULL) return(0);
5875 return(XML_ATTRIBUTE_NOTATION);
5876 }
5877 *tree = xmlParseEnumerationType(ctxt);
5878 if (*tree == NULL) return(0);
5879 return(XML_ATTRIBUTE_ENUMERATION);
5880 }
5881
5882 /**
5883 * xmlParseAttributeType:
5884 * @ctxt: an XML parser context
5885 * @tree: the enumeration tree built while parsing
5886 *
5887 * parse the Attribute list def for an element
5888 *
5889 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5890 *
5891 * [55] StringType ::= 'CDATA'
5892 *
5893 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5894 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5895 *
5896 * Validity constraints for attribute values syntax are checked in
5897 * xmlValidateAttributeValue()
5898 *
5899 * [ VC: ID ]
5900 * Values of type ID must match the Name production. A name must not
5901 * appear more than once in an XML document as a value of this type;
5902 * i.e., ID values must uniquely identify the elements which bear them.
5903 *
5904 * [ VC: One ID per Element Type ]
5905 * No element type may have more than one ID attribute specified.
5906 *
5907 * [ VC: ID Attribute Default ]
5908 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5909 *
5910 * [ VC: IDREF ]
5911 * Values of type IDREF must match the Name production, and values
5912 * of type IDREFS must match Names; each IDREF Name must match the value
5913 * of an ID attribute on some element in the XML document; i.e. IDREF
5914 * values must match the value of some ID attribute.
5915 *
5916 * [ VC: Entity Name ]
5917 * Values of type ENTITY must match the Name production, values
5918 * of type ENTITIES must match Names; each Entity Name must match the
5919 * name of an unparsed entity declared in the DTD.
5920 *
5921 * [ VC: Name Token ]
5922 * Values of type NMTOKEN must match the Nmtoken production; values
5923 * of type NMTOKENS must match Nmtokens.
5924 *
5925 * Returns the attribute type
5926 */
5927 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5928 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5929 SHRINK;
5930 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5931 SKIP(5);
5932 return(XML_ATTRIBUTE_CDATA);
5933 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5934 SKIP(6);
5935 return(XML_ATTRIBUTE_IDREFS);
5936 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5937 SKIP(5);
5938 return(XML_ATTRIBUTE_IDREF);
5939 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5940 SKIP(2);
5941 return(XML_ATTRIBUTE_ID);
5942 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5943 SKIP(6);
5944 return(XML_ATTRIBUTE_ENTITY);
5945 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5946 SKIP(8);
5947 return(XML_ATTRIBUTE_ENTITIES);
5948 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5949 SKIP(8);
5950 return(XML_ATTRIBUTE_NMTOKENS);
5951 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5952 SKIP(7);
5953 return(XML_ATTRIBUTE_NMTOKEN);
5954 }
5955 return(xmlParseEnumeratedType(ctxt, tree));
5956 }
5957
5958 /**
5959 * xmlParseAttributeListDecl:
5960 * @ctxt: an XML parser context
5961 *
5962 * : parse the Attribute list def for an element
5963 *
5964 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5965 *
5966 * [53] AttDef ::= S Name S AttType S DefaultDecl
5967 *
5968 */
5969 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)5970 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5971 const xmlChar *elemName;
5972 const xmlChar *attrName;
5973 xmlEnumerationPtr tree;
5974
5975 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5976 int inputid = ctxt->input->id;
5977
5978 SKIP(9);
5979 if (SKIP_BLANKS == 0) {
5980 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5981 "Space required after '<!ATTLIST'\n");
5982 }
5983 elemName = xmlParseName(ctxt);
5984 if (elemName == NULL) {
5985 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5986 "ATTLIST: no name for Element\n");
5987 return;
5988 }
5989 SKIP_BLANKS;
5990 GROW;
5991 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5992 int type;
5993 int def;
5994 xmlChar *defaultValue = NULL;
5995
5996 GROW;
5997 tree = NULL;
5998 attrName = xmlParseName(ctxt);
5999 if (attrName == NULL) {
6000 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6001 "ATTLIST: no name for Attribute\n");
6002 break;
6003 }
6004 GROW;
6005 if (SKIP_BLANKS == 0) {
6006 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6007 "Space required after the attribute name\n");
6008 break;
6009 }
6010
6011 type = xmlParseAttributeType(ctxt, &tree);
6012 if (type <= 0) {
6013 break;
6014 }
6015
6016 GROW;
6017 if (SKIP_BLANKS == 0) {
6018 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6019 "Space required after the attribute type\n");
6020 if (tree != NULL)
6021 xmlFreeEnumeration(tree);
6022 break;
6023 }
6024
6025 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6026 if (def <= 0) {
6027 if (defaultValue != NULL)
6028 xmlFree(defaultValue);
6029 if (tree != NULL)
6030 xmlFreeEnumeration(tree);
6031 break;
6032 }
6033 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6034 xmlAttrNormalizeSpace(defaultValue, defaultValue);
6035
6036 GROW;
6037 if (RAW != '>') {
6038 if (SKIP_BLANKS == 0) {
6039 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6040 "Space required after the attribute default value\n");
6041 if (defaultValue != NULL)
6042 xmlFree(defaultValue);
6043 if (tree != NULL)
6044 xmlFreeEnumeration(tree);
6045 break;
6046 }
6047 }
6048 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6049 (ctxt->sax->attributeDecl != NULL))
6050 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6051 type, def, defaultValue, tree);
6052 else if (tree != NULL)
6053 xmlFreeEnumeration(tree);
6054
6055 if ((ctxt->sax2) && (defaultValue != NULL) &&
6056 (def != XML_ATTRIBUTE_IMPLIED) &&
6057 (def != XML_ATTRIBUTE_REQUIRED)) {
6058 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6059 }
6060 if (ctxt->sax2) {
6061 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6062 }
6063 if (defaultValue != NULL)
6064 xmlFree(defaultValue);
6065 GROW;
6066 }
6067 if (RAW == '>') {
6068 if (inputid != ctxt->input->id) {
6069 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6070 "Attribute list declaration doesn't start and"
6071 " stop in the same entity\n");
6072 }
6073 NEXT;
6074 }
6075 }
6076 }
6077
6078 /**
6079 * xmlParseElementMixedContentDecl:
6080 * @ctxt: an XML parser context
6081 * @inputchk: the input used for the current entity, needed for boundary checks
6082 *
6083 * parse the declaration for a Mixed Element content
6084 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6085 *
6086 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6087 * '(' S? '#PCDATA' S? ')'
6088 *
6089 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6090 *
6091 * [ VC: No Duplicate Types ]
6092 * The same name must not appear more than once in a single
6093 * mixed-content declaration.
6094 *
6095 * returns: the list of the xmlElementContentPtr describing the element choices
6096 */
6097 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6098 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6099 xmlElementContentPtr ret = NULL, cur = NULL, n;
6100 const xmlChar *elem = NULL;
6101
6102 GROW;
6103 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6104 SKIP(7);
6105 SKIP_BLANKS;
6106 SHRINK;
6107 if (RAW == ')') {
6108 if (ctxt->input->id != inputchk) {
6109 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6110 "Element content declaration doesn't start and"
6111 " stop in the same entity\n");
6112 }
6113 NEXT;
6114 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6115 if (ret == NULL)
6116 return(NULL);
6117 if (RAW == '*') {
6118 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6119 NEXT;
6120 }
6121 return(ret);
6122 }
6123 if ((RAW == '(') || (RAW == '|')) {
6124 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6125 if (ret == NULL) return(NULL);
6126 }
6127 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6128 NEXT;
6129 if (elem == NULL) {
6130 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6131 if (ret == NULL) {
6132 xmlFreeDocElementContent(ctxt->myDoc, cur);
6133 return(NULL);
6134 }
6135 ret->c1 = cur;
6136 if (cur != NULL)
6137 cur->parent = ret;
6138 cur = ret;
6139 } else {
6140 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6141 if (n == NULL) {
6142 xmlFreeDocElementContent(ctxt->myDoc, ret);
6143 return(NULL);
6144 }
6145 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6146 if (n->c1 != NULL)
6147 n->c1->parent = n;
6148 cur->c2 = n;
6149 if (n != NULL)
6150 n->parent = cur;
6151 cur = n;
6152 }
6153 SKIP_BLANKS;
6154 elem = xmlParseName(ctxt);
6155 if (elem == NULL) {
6156 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6157 "xmlParseElementMixedContentDecl : Name expected\n");
6158 xmlFreeDocElementContent(ctxt->myDoc, ret);
6159 return(NULL);
6160 }
6161 SKIP_BLANKS;
6162 GROW;
6163 }
6164 if ((RAW == ')') && (NXT(1) == '*')) {
6165 if (elem != NULL) {
6166 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6167 XML_ELEMENT_CONTENT_ELEMENT);
6168 if (cur->c2 != NULL)
6169 cur->c2->parent = cur;
6170 }
6171 if (ret != NULL)
6172 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6173 if (ctxt->input->id != inputchk) {
6174 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6175 "Element content declaration doesn't start and"
6176 " stop in the same entity\n");
6177 }
6178 SKIP(2);
6179 } else {
6180 xmlFreeDocElementContent(ctxt->myDoc, ret);
6181 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6182 return(NULL);
6183 }
6184
6185 } else {
6186 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6187 }
6188 return(ret);
6189 }
6190
6191 /**
6192 * xmlParseElementChildrenContentDeclPriv:
6193 * @ctxt: an XML parser context
6194 * @inputchk: the input used for the current entity, needed for boundary checks
6195 * @depth: the level of recursion
6196 *
6197 * parse the declaration for a Mixed Element content
6198 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6199 *
6200 *
6201 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6202 *
6203 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6204 *
6205 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6206 *
6207 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6208 *
6209 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6210 * TODO Parameter-entity replacement text must be properly nested
6211 * with parenthesized groups. That is to say, if either of the
6212 * opening or closing parentheses in a choice, seq, or Mixed
6213 * construct is contained in the replacement text for a parameter
6214 * entity, both must be contained in the same replacement text. For
6215 * interoperability, if a parameter-entity reference appears in a
6216 * choice, seq, or Mixed construct, its replacement text should not
6217 * be empty, and neither the first nor last non-blank character of
6218 * the replacement text should be a connector (| or ,).
6219 *
6220 * Returns the tree of xmlElementContentPtr describing the element
6221 * hierarchy.
6222 */
6223 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)6224 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6225 int depth) {
6226 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6227 const xmlChar *elem;
6228 xmlChar type = 0;
6229
6230 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6231 (depth > 2048)) {
6232 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6233 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6234 depth);
6235 return(NULL);
6236 }
6237 SKIP_BLANKS;
6238 GROW;
6239 if (RAW == '(') {
6240 int inputid = ctxt->input->id;
6241
6242 /* Recurse on first child */
6243 NEXT;
6244 SKIP_BLANKS;
6245 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6246 depth + 1);
6247 if (cur == NULL)
6248 return(NULL);
6249 SKIP_BLANKS;
6250 GROW;
6251 } else {
6252 elem = xmlParseName(ctxt);
6253 if (elem == NULL) {
6254 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6255 return(NULL);
6256 }
6257 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6258 if (cur == NULL) {
6259 xmlErrMemory(ctxt, NULL);
6260 return(NULL);
6261 }
6262 GROW;
6263 if (RAW == '?') {
6264 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6265 NEXT;
6266 } else if (RAW == '*') {
6267 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6268 NEXT;
6269 } else if (RAW == '+') {
6270 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6271 NEXT;
6272 } else {
6273 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6274 }
6275 GROW;
6276 }
6277 SKIP_BLANKS;
6278 SHRINK;
6279 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6280 /*
6281 * Each loop we parse one separator and one element.
6282 */
6283 if (RAW == ',') {
6284 if (type == 0) type = CUR;
6285
6286 /*
6287 * Detect "Name | Name , Name" error
6288 */
6289 else if (type != CUR) {
6290 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6291 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6292 type);
6293 if ((last != NULL) && (last != ret))
6294 xmlFreeDocElementContent(ctxt->myDoc, last);
6295 if (ret != NULL)
6296 xmlFreeDocElementContent(ctxt->myDoc, ret);
6297 return(NULL);
6298 }
6299 NEXT;
6300
6301 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6302 if (op == NULL) {
6303 if ((last != NULL) && (last != ret))
6304 xmlFreeDocElementContent(ctxt->myDoc, last);
6305 xmlFreeDocElementContent(ctxt->myDoc, ret);
6306 return(NULL);
6307 }
6308 if (last == NULL) {
6309 op->c1 = ret;
6310 if (ret != NULL)
6311 ret->parent = op;
6312 ret = cur = op;
6313 } else {
6314 cur->c2 = op;
6315 if (op != NULL)
6316 op->parent = cur;
6317 op->c1 = last;
6318 if (last != NULL)
6319 last->parent = op;
6320 cur =op;
6321 last = NULL;
6322 }
6323 } else if (RAW == '|') {
6324 if (type == 0) type = CUR;
6325
6326 /*
6327 * Detect "Name , Name | Name" error
6328 */
6329 else if (type != CUR) {
6330 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6331 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6332 type);
6333 if ((last != NULL) && (last != ret))
6334 xmlFreeDocElementContent(ctxt->myDoc, last);
6335 if (ret != NULL)
6336 xmlFreeDocElementContent(ctxt->myDoc, ret);
6337 return(NULL);
6338 }
6339 NEXT;
6340
6341 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6342 if (op == NULL) {
6343 if ((last != NULL) && (last != ret))
6344 xmlFreeDocElementContent(ctxt->myDoc, last);
6345 if (ret != NULL)
6346 xmlFreeDocElementContent(ctxt->myDoc, ret);
6347 return(NULL);
6348 }
6349 if (last == NULL) {
6350 op->c1 = ret;
6351 if (ret != NULL)
6352 ret->parent = op;
6353 ret = cur = op;
6354 } else {
6355 cur->c2 = op;
6356 if (op != NULL)
6357 op->parent = cur;
6358 op->c1 = last;
6359 if (last != NULL)
6360 last->parent = op;
6361 cur =op;
6362 last = NULL;
6363 }
6364 } else {
6365 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6366 if ((last != NULL) && (last != ret))
6367 xmlFreeDocElementContent(ctxt->myDoc, last);
6368 if (ret != NULL)
6369 xmlFreeDocElementContent(ctxt->myDoc, ret);
6370 return(NULL);
6371 }
6372 GROW;
6373 SKIP_BLANKS;
6374 GROW;
6375 if (RAW == '(') {
6376 int inputid = ctxt->input->id;
6377 /* Recurse on second child */
6378 NEXT;
6379 SKIP_BLANKS;
6380 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6381 depth + 1);
6382 if (last == NULL) {
6383 if (ret != NULL)
6384 xmlFreeDocElementContent(ctxt->myDoc, ret);
6385 return(NULL);
6386 }
6387 SKIP_BLANKS;
6388 } else {
6389 elem = xmlParseName(ctxt);
6390 if (elem == NULL) {
6391 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6392 if (ret != NULL)
6393 xmlFreeDocElementContent(ctxt->myDoc, ret);
6394 return(NULL);
6395 }
6396 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6397 if (last == NULL) {
6398 if (ret != NULL)
6399 xmlFreeDocElementContent(ctxt->myDoc, ret);
6400 return(NULL);
6401 }
6402 if (RAW == '?') {
6403 last->ocur = XML_ELEMENT_CONTENT_OPT;
6404 NEXT;
6405 } else if (RAW == '*') {
6406 last->ocur = XML_ELEMENT_CONTENT_MULT;
6407 NEXT;
6408 } else if (RAW == '+') {
6409 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6410 NEXT;
6411 } else {
6412 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6413 }
6414 }
6415 SKIP_BLANKS;
6416 GROW;
6417 }
6418 if ((cur != NULL) && (last != NULL)) {
6419 cur->c2 = last;
6420 if (last != NULL)
6421 last->parent = cur;
6422 }
6423 if (ctxt->input->id != inputchk) {
6424 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6425 "Element content declaration doesn't start and stop in"
6426 " the same entity\n");
6427 }
6428 NEXT;
6429 if (RAW == '?') {
6430 if (ret != NULL) {
6431 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6432 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6433 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6434 else
6435 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6436 }
6437 NEXT;
6438 } else if (RAW == '*') {
6439 if (ret != NULL) {
6440 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6441 cur = ret;
6442 /*
6443 * Some normalization:
6444 * (a | b* | c?)* == (a | b | c)*
6445 */
6446 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6447 if ((cur->c1 != NULL) &&
6448 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6449 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6450 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6451 if ((cur->c2 != NULL) &&
6452 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6453 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6454 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6455 cur = cur->c2;
6456 }
6457 }
6458 NEXT;
6459 } else if (RAW == '+') {
6460 if (ret != NULL) {
6461 int found = 0;
6462
6463 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6464 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6465 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6466 else
6467 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6468 /*
6469 * Some normalization:
6470 * (a | b*)+ == (a | b)*
6471 * (a | b?)+ == (a | b)*
6472 */
6473 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6474 if ((cur->c1 != NULL) &&
6475 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6476 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6477 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6478 found = 1;
6479 }
6480 if ((cur->c2 != NULL) &&
6481 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6482 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6483 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6484 found = 1;
6485 }
6486 cur = cur->c2;
6487 }
6488 if (found)
6489 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6490 }
6491 NEXT;
6492 }
6493 return(ret);
6494 }
6495
6496 /**
6497 * xmlParseElementChildrenContentDecl:
6498 * @ctxt: an XML parser context
6499 * @inputchk: the input used for the current entity, needed for boundary checks
6500 *
6501 * parse the declaration for a Mixed Element content
6502 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6503 *
6504 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6505 *
6506 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6507 *
6508 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6509 *
6510 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6511 *
6512 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6513 * TODO Parameter-entity replacement text must be properly nested
6514 * with parenthesized groups. That is to say, if either of the
6515 * opening or closing parentheses in a choice, seq, or Mixed
6516 * construct is contained in the replacement text for a parameter
6517 * entity, both must be contained in the same replacement text. For
6518 * interoperability, if a parameter-entity reference appears in a
6519 * choice, seq, or Mixed construct, its replacement text should not
6520 * be empty, and neither the first nor last non-blank character of
6521 * the replacement text should be a connector (| or ,).
6522 *
6523 * Returns the tree of xmlElementContentPtr describing the element
6524 * hierarchy.
6525 */
6526 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6527 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6528 /* stub left for API/ABI compat */
6529 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6530 }
6531
6532 /**
6533 * xmlParseElementContentDecl:
6534 * @ctxt: an XML parser context
6535 * @name: the name of the element being defined.
6536 * @result: the Element Content pointer will be stored here if any
6537 *
6538 * parse the declaration for an Element content either Mixed or Children,
6539 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6540 *
6541 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6542 *
6543 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6544 */
6545
6546 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6547 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6548 xmlElementContentPtr *result) {
6549
6550 xmlElementContentPtr tree = NULL;
6551 int inputid = ctxt->input->id;
6552 int res;
6553
6554 *result = NULL;
6555
6556 if (RAW != '(') {
6557 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6558 "xmlParseElementContentDecl : %s '(' expected\n", name);
6559 return(-1);
6560 }
6561 NEXT;
6562 GROW;
6563 if (ctxt->instate == XML_PARSER_EOF)
6564 return(-1);
6565 SKIP_BLANKS;
6566 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6567 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6568 res = XML_ELEMENT_TYPE_MIXED;
6569 } else {
6570 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6571 res = XML_ELEMENT_TYPE_ELEMENT;
6572 }
6573 SKIP_BLANKS;
6574 *result = tree;
6575 return(res);
6576 }
6577
6578 /**
6579 * xmlParseElementDecl:
6580 * @ctxt: an XML parser context
6581 *
6582 * parse an Element declaration.
6583 *
6584 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6585 *
6586 * [ VC: Unique Element Type Declaration ]
6587 * No element type may be declared more than once
6588 *
6589 * Returns the type of the element, or -1 in case of error
6590 */
6591 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6592 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6593 const xmlChar *name;
6594 int ret = -1;
6595 xmlElementContentPtr content = NULL;
6596
6597 /* GROW; done in the caller */
6598 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6599 int inputid = ctxt->input->id;
6600
6601 SKIP(9);
6602 if (SKIP_BLANKS == 0) {
6603 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6604 "Space required after 'ELEMENT'\n");
6605 return(-1);
6606 }
6607 name = xmlParseName(ctxt);
6608 if (name == NULL) {
6609 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6610 "xmlParseElementDecl: no name for Element\n");
6611 return(-1);
6612 }
6613 if (SKIP_BLANKS == 0) {
6614 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6615 "Space required after the element name\n");
6616 }
6617 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6618 SKIP(5);
6619 /*
6620 * Element must always be empty.
6621 */
6622 ret = XML_ELEMENT_TYPE_EMPTY;
6623 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6624 (NXT(2) == 'Y')) {
6625 SKIP(3);
6626 /*
6627 * Element is a generic container.
6628 */
6629 ret = XML_ELEMENT_TYPE_ANY;
6630 } else if (RAW == '(') {
6631 ret = xmlParseElementContentDecl(ctxt, name, &content);
6632 } else {
6633 /*
6634 * [ WFC: PEs in Internal Subset ] error handling.
6635 */
6636 if ((RAW == '%') && (ctxt->external == 0) &&
6637 (ctxt->inputNr == 1)) {
6638 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6639 "PEReference: forbidden within markup decl in internal subset\n");
6640 } else {
6641 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6642 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6643 }
6644 return(-1);
6645 }
6646
6647 SKIP_BLANKS;
6648
6649 if (RAW != '>') {
6650 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6651 if (content != NULL) {
6652 xmlFreeDocElementContent(ctxt->myDoc, content);
6653 }
6654 } else {
6655 if (inputid != ctxt->input->id) {
6656 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6657 "Element declaration doesn't start and stop in"
6658 " the same entity\n");
6659 }
6660
6661 NEXT;
6662 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6663 (ctxt->sax->elementDecl != NULL)) {
6664 if (content != NULL)
6665 content->parent = NULL;
6666 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6667 content);
6668 if ((content != NULL) && (content->parent == NULL)) {
6669 /*
6670 * this is a trick: if xmlAddElementDecl is called,
6671 * instead of copying the full tree it is plugged directly
6672 * if called from the parser. Avoid duplicating the
6673 * interfaces or change the API/ABI
6674 */
6675 xmlFreeDocElementContent(ctxt->myDoc, content);
6676 }
6677 } else if (content != NULL) {
6678 xmlFreeDocElementContent(ctxt->myDoc, content);
6679 }
6680 }
6681 }
6682 return(ret);
6683 }
6684
6685 /**
6686 * xmlParseConditionalSections
6687 * @ctxt: an XML parser context
6688 *
6689 * [61] conditionalSect ::= includeSect | ignoreSect
6690 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6691 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6692 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6693 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6694 */
6695
6696 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6697 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6698 int *inputIds = NULL;
6699 size_t inputIdsSize = 0;
6700 size_t depth = 0;
6701
6702 while (ctxt->instate != XML_PARSER_EOF) {
6703 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6704 int id = ctxt->input->id;
6705
6706 SKIP(3);
6707 SKIP_BLANKS;
6708
6709 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6710 SKIP(7);
6711 SKIP_BLANKS;
6712 if (RAW != '[') {
6713 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6714 xmlHaltParser(ctxt);
6715 goto error;
6716 }
6717 if (ctxt->input->id != id) {
6718 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6719 "All markup of the conditional section is"
6720 " not in the same entity\n");
6721 }
6722 NEXT;
6723
6724 if (inputIdsSize <= depth) {
6725 int *tmp;
6726
6727 inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6728 tmp = (int *) xmlRealloc(inputIds,
6729 inputIdsSize * sizeof(int));
6730 if (tmp == NULL) {
6731 xmlErrMemory(ctxt, NULL);
6732 goto error;
6733 }
6734 inputIds = tmp;
6735 }
6736 inputIds[depth] = id;
6737 depth++;
6738 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6739 int state;
6740 xmlParserInputState instate;
6741 size_t ignoreDepth = 0;
6742
6743 SKIP(6);
6744 SKIP_BLANKS;
6745 if (RAW != '[') {
6746 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6747 xmlHaltParser(ctxt);
6748 goto error;
6749 }
6750 if (ctxt->input->id != id) {
6751 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6752 "All markup of the conditional section is"
6753 " not in the same entity\n");
6754 }
6755 NEXT;
6756
6757 /*
6758 * Parse up to the end of the conditional section but disable
6759 * SAX event generating DTD building in the meantime
6760 */
6761 state = ctxt->disableSAX;
6762 instate = ctxt->instate;
6763 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6764 ctxt->instate = XML_PARSER_IGNORE;
6765
6766 while (RAW != 0) {
6767 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6768 SKIP(3);
6769 ignoreDepth++;
6770 /* Check for integer overflow */
6771 if (ignoreDepth == 0) {
6772 xmlErrMemory(ctxt, NULL);
6773 goto error;
6774 }
6775 } else if ((RAW == ']') && (NXT(1) == ']') &&
6776 (NXT(2) == '>')) {
6777 if (ignoreDepth == 0)
6778 break;
6779 SKIP(3);
6780 ignoreDepth--;
6781 } else {
6782 NEXT;
6783 }
6784 }
6785
6786 ctxt->disableSAX = state;
6787 ctxt->instate = instate;
6788
6789 if (RAW == 0) {
6790 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6791 goto error;
6792 }
6793 if (ctxt->input->id != id) {
6794 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6795 "All markup of the conditional section is"
6796 " not in the same entity\n");
6797 }
6798 SKIP(3);
6799 } else {
6800 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6801 xmlHaltParser(ctxt);
6802 goto error;
6803 }
6804 } else if ((depth > 0) &&
6805 (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6806 depth--;
6807 if (ctxt->input->id != inputIds[depth]) {
6808 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6809 "All markup of the conditional section is not"
6810 " in the same entity\n");
6811 }
6812 SKIP(3);
6813 } else {
6814 const xmlChar *check = CUR_PTR;
6815 unsigned int cons = ctxt->input->consumed;
6816
6817 xmlParseMarkupDecl(ctxt);
6818
6819 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6820 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6821 xmlHaltParser(ctxt);
6822 goto error;
6823 }
6824 }
6825
6826 if (depth == 0)
6827 break;
6828
6829 SKIP_BLANKS;
6830 GROW;
6831 }
6832
6833 error:
6834 xmlFree(inputIds);
6835 }
6836
6837 /**
6838 * xmlParseMarkupDecl:
6839 * @ctxt: an XML parser context
6840 *
6841 * parse Markup declarations
6842 *
6843 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6844 * NotationDecl | PI | Comment
6845 *
6846 * [ VC: Proper Declaration/PE Nesting ]
6847 * Parameter-entity replacement text must be properly nested with
6848 * markup declarations. That is to say, if either the first character
6849 * or the last character of a markup declaration (markupdecl above) is
6850 * contained in the replacement text for a parameter-entity reference,
6851 * both must be contained in the same replacement text.
6852 *
6853 * [ WFC: PEs in Internal Subset ]
6854 * In the internal DTD subset, parameter-entity references can occur
6855 * only where markup declarations can occur, not within markup declarations.
6856 * (This does not apply to references that occur in external parameter
6857 * entities or to the external subset.)
6858 */
6859 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)6860 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6861 GROW;
6862 if (CUR == '<') {
6863 if (NXT(1) == '!') {
6864 switch (NXT(2)) {
6865 case 'E':
6866 if (NXT(3) == 'L')
6867 xmlParseElementDecl(ctxt);
6868 else if (NXT(3) == 'N')
6869 xmlParseEntityDecl(ctxt);
6870 break;
6871 case 'A':
6872 xmlParseAttributeListDecl(ctxt);
6873 break;
6874 case 'N':
6875 xmlParseNotationDecl(ctxt);
6876 break;
6877 case '-':
6878 xmlParseComment(ctxt);
6879 break;
6880 default:
6881 /* there is an error but it will be detected later */
6882 break;
6883 }
6884 } else if (NXT(1) == '?') {
6885 xmlParsePI(ctxt);
6886 }
6887 }
6888
6889 /*
6890 * detect requirement to exit there and act accordingly
6891 * and avoid having instate overridden later on
6892 */
6893 if (ctxt->instate == XML_PARSER_EOF)
6894 return;
6895
6896 ctxt->instate = XML_PARSER_DTD;
6897 }
6898
6899 /**
6900 * xmlParseTextDecl:
6901 * @ctxt: an XML parser context
6902 *
6903 * parse an XML declaration header for external entities
6904 *
6905 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6906 */
6907
6908 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)6909 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6910 xmlChar *version;
6911 const xmlChar *encoding;
6912 int oldstate;
6913
6914 /*
6915 * We know that '<?xml' is here.
6916 */
6917 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6918 SKIP(5);
6919 } else {
6920 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6921 return;
6922 }
6923
6924 /* Avoid expansion of parameter entities when skipping blanks. */
6925 oldstate = ctxt->instate;
6926 ctxt->instate = XML_PARSER_START;
6927
6928 if (SKIP_BLANKS == 0) {
6929 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6930 "Space needed after '<?xml'\n");
6931 }
6932
6933 /*
6934 * We may have the VersionInfo here.
6935 */
6936 version = xmlParseVersionInfo(ctxt);
6937 if (version == NULL)
6938 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6939 else {
6940 if (SKIP_BLANKS == 0) {
6941 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6942 "Space needed here\n");
6943 }
6944 }
6945 ctxt->input->version = version;
6946
6947 /*
6948 * We must have the encoding declaration
6949 */
6950 encoding = xmlParseEncodingDecl(ctxt);
6951 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6952 /*
6953 * The XML REC instructs us to stop parsing right here
6954 */
6955 ctxt->instate = oldstate;
6956 return;
6957 }
6958 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6959 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6960 "Missing encoding in text declaration\n");
6961 }
6962
6963 SKIP_BLANKS;
6964 if ((RAW == '?') && (NXT(1) == '>')) {
6965 SKIP(2);
6966 } else if (RAW == '>') {
6967 /* Deprecated old WD ... */
6968 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6969 NEXT;
6970 } else {
6971 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6972 MOVETO_ENDTAG(CUR_PTR);
6973 NEXT;
6974 }
6975
6976 ctxt->instate = oldstate;
6977 }
6978
6979 /**
6980 * xmlParseExternalSubset:
6981 * @ctxt: an XML parser context
6982 * @ExternalID: the external identifier
6983 * @SystemID: the system identifier (or URL)
6984 *
6985 * parse Markup declarations from an external subset
6986 *
6987 * [30] extSubset ::= textDecl? extSubsetDecl
6988 *
6989 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6990 */
6991 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)6992 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6993 const xmlChar *SystemID) {
6994 xmlDetectSAX2(ctxt);
6995 GROW;
6996
6997 if ((ctxt->encoding == NULL) &&
6998 (ctxt->input->end - ctxt->input->cur >= 4)) {
6999 xmlChar start[4];
7000 xmlCharEncoding enc;
7001
7002 start[0] = RAW;
7003 start[1] = NXT(1);
7004 start[2] = NXT(2);
7005 start[3] = NXT(3);
7006 enc = xmlDetectCharEncoding(start, 4);
7007 if (enc != XML_CHAR_ENCODING_NONE)
7008 xmlSwitchEncoding(ctxt, enc);
7009 }
7010
7011 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7012 xmlParseTextDecl(ctxt);
7013 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7014 /*
7015 * The XML REC instructs us to stop parsing right here
7016 */
7017 xmlHaltParser(ctxt);
7018 return;
7019 }
7020 }
7021 if (ctxt->myDoc == NULL) {
7022 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7023 if (ctxt->myDoc == NULL) {
7024 xmlErrMemory(ctxt, "New Doc failed");
7025 return;
7026 }
7027 ctxt->myDoc->properties = XML_DOC_INTERNAL;
7028 }
7029 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7030 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7031
7032 ctxt->instate = XML_PARSER_DTD;
7033 ctxt->external = 1;
7034 SKIP_BLANKS;
7035 while (((RAW == '<') && (NXT(1) == '?')) ||
7036 ((RAW == '<') && (NXT(1) == '!')) ||
7037 (RAW == '%')) {
7038 const xmlChar *check = CUR_PTR;
7039 unsigned int cons = ctxt->input->consumed;
7040
7041 GROW;
7042 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7043 xmlParseConditionalSections(ctxt);
7044 } else
7045 xmlParseMarkupDecl(ctxt);
7046 SKIP_BLANKS;
7047
7048 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7049 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7050 break;
7051 }
7052 }
7053
7054 if (RAW != 0) {
7055 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7056 }
7057
7058 }
7059
7060 /**
7061 * xmlParseReference:
7062 * @ctxt: an XML parser context
7063 *
7064 * parse and handle entity references in content, depending on the SAX
7065 * interface, this may end-up in a call to character() if this is a
7066 * CharRef, a predefined entity, if there is no reference() callback.
7067 * or if the parser was asked to switch to that mode.
7068 *
7069 * [67] Reference ::= EntityRef | CharRef
7070 */
7071 void
xmlParseReference(xmlParserCtxtPtr ctxt)7072 xmlParseReference(xmlParserCtxtPtr ctxt) {
7073 xmlEntityPtr ent;
7074 xmlChar *val;
7075 int was_checked;
7076 xmlNodePtr list = NULL;
7077 xmlParserErrors ret = XML_ERR_OK;
7078
7079
7080 if (RAW != '&')
7081 return;
7082
7083 /*
7084 * Simple case of a CharRef
7085 */
7086 if (NXT(1) == '#') {
7087 int i = 0;
7088 xmlChar out[16];
7089 int hex = NXT(2);
7090 int value = xmlParseCharRef(ctxt);
7091
7092 if (value == 0)
7093 return;
7094 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7095 /*
7096 * So we are using non-UTF-8 buffers
7097 * Check that the char fit on 8bits, if not
7098 * generate a CharRef.
7099 */
7100 if (value <= 0xFF) {
7101 out[0] = value;
7102 out[1] = 0;
7103 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7104 (!ctxt->disableSAX))
7105 ctxt->sax->characters(ctxt->userData, out, 1);
7106 } else {
7107 if ((hex == 'x') || (hex == 'X'))
7108 snprintf((char *)out, sizeof(out), "#x%X", value);
7109 else
7110 snprintf((char *)out, sizeof(out), "#%d", value);
7111 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7112 (!ctxt->disableSAX))
7113 ctxt->sax->reference(ctxt->userData, out);
7114 }
7115 } else {
7116 /*
7117 * Just encode the value in UTF-8
7118 */
7119 COPY_BUF(0 ,out, i, value);
7120 out[i] = 0;
7121 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7122 (!ctxt->disableSAX))
7123 ctxt->sax->characters(ctxt->userData, out, i);
7124 }
7125 return;
7126 }
7127
7128 /*
7129 * We are seeing an entity reference
7130 */
7131 ent = xmlParseEntityRef(ctxt);
7132 if (ent == NULL) return;
7133 if (!ctxt->wellFormed)
7134 return;
7135 was_checked = ent->checked;
7136
7137 /* special case of predefined entities */
7138 if ((ent->name == NULL) ||
7139 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7140 val = ent->content;
7141 if (val == NULL) return;
7142 /*
7143 * inline the entity.
7144 */
7145 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7146 (!ctxt->disableSAX))
7147 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7148 return;
7149 }
7150
7151 /*
7152 * The first reference to the entity trigger a parsing phase
7153 * where the ent->children is filled with the result from
7154 * the parsing.
7155 * Note: external parsed entities will not be loaded, it is not
7156 * required for a non-validating parser, unless the parsing option
7157 * of validating, or substituting entities were given. Doing so is
7158 * far more secure as the parser will only process data coming from
7159 * the document entity by default.
7160 */
7161 if (((ent->checked == 0) ||
7162 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7163 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7164 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7165 unsigned long oldnbent = ctxt->nbentities, diff;
7166
7167 /*
7168 * This is a bit hackish but this seems the best
7169 * way to make sure both SAX and DOM entity support
7170 * behaves okay.
7171 */
7172 void *user_data;
7173 if (ctxt->userData == ctxt)
7174 user_data = NULL;
7175 else
7176 user_data = ctxt->userData;
7177
7178 /*
7179 * Check that this entity is well formed
7180 * 4.3.2: An internal general parsed entity is well-formed
7181 * if its replacement text matches the production labeled
7182 * content.
7183 */
7184 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7185 ctxt->depth++;
7186 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7187 user_data, &list);
7188 ctxt->depth--;
7189
7190 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7191 ctxt->depth++;
7192 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7193 user_data, ctxt->depth, ent->URI,
7194 ent->ExternalID, &list);
7195 ctxt->depth--;
7196 } else {
7197 ret = XML_ERR_ENTITY_PE_INTERNAL;
7198 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7199 "invalid entity type found\n", NULL);
7200 }
7201
7202 /*
7203 * Store the number of entities needing parsing for this entity
7204 * content and do checkings
7205 */
7206 diff = ctxt->nbentities - oldnbent + 1;
7207 if (diff > INT_MAX / 2)
7208 diff = INT_MAX / 2;
7209 ent->checked = diff * 2;
7210 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7211 ent->checked |= 1;
7212 if (ret == XML_ERR_ENTITY_LOOP) {
7213 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7214 xmlHaltParser(ctxt);
7215 xmlFreeNodeList(list);
7216 return;
7217 }
7218 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7219 xmlFreeNodeList(list);
7220 return;
7221 }
7222
7223 if ((ret == XML_ERR_OK) && (list != NULL)) {
7224 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7225 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7226 (ent->children == NULL)) {
7227 ent->children = list;
7228 /*
7229 * Prune it directly in the generated document
7230 * except for single text nodes.
7231 */
7232 if ((ctxt->replaceEntities == 0) ||
7233 (ctxt->parseMode == XML_PARSE_READER) ||
7234 ((list->type == XML_TEXT_NODE) &&
7235 (list->next == NULL))) {
7236 ent->owner = 1;
7237 while (list != NULL) {
7238 list->parent = (xmlNodePtr) ent;
7239 xmlSetTreeDoc(list, ent->doc);
7240 if (list->next == NULL)
7241 ent->last = list;
7242 list = list->next;
7243 }
7244 list = NULL;
7245 } else {
7246 ent->owner = 0;
7247 while (list != NULL) {
7248 list->parent = (xmlNodePtr) ctxt->node;
7249 list->doc = ctxt->myDoc;
7250 if (list->next == NULL)
7251 ent->last = list;
7252 list = list->next;
7253 }
7254 list = ent->children;
7255 #ifdef LIBXML_LEGACY_ENABLED
7256 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7257 xmlAddEntityReference(ent, list, NULL);
7258 #endif /* LIBXML_LEGACY_ENABLED */
7259 }
7260 } else {
7261 xmlFreeNodeList(list);
7262 list = NULL;
7263 }
7264 } else if ((ret != XML_ERR_OK) &&
7265 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7266 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7267 "Entity '%s' failed to parse\n", ent->name);
7268 if (ent->content != NULL)
7269 ent->content[0] = 0;
7270 xmlParserEntityCheck(ctxt, 0, ent, 0);
7271 } else if (list != NULL) {
7272 xmlFreeNodeList(list);
7273 list = NULL;
7274 }
7275 if (ent->checked == 0)
7276 ent->checked = 2;
7277
7278 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7279 was_checked = 0;
7280 } else if (ent->checked != 1) {
7281 ctxt->nbentities += ent->checked / 2;
7282 }
7283
7284 /*
7285 * Now that the entity content has been gathered
7286 * provide it to the application, this can take different forms based
7287 * on the parsing modes.
7288 */
7289 if (ent->children == NULL) {
7290 /*
7291 * Probably running in SAX mode and the callbacks don't
7292 * build the entity content. So unless we already went
7293 * though parsing for first checking go though the entity
7294 * content to generate callbacks associated to the entity
7295 */
7296 if (was_checked != 0) {
7297 void *user_data;
7298 /*
7299 * This is a bit hackish but this seems the best
7300 * way to make sure both SAX and DOM entity support
7301 * behaves okay.
7302 */
7303 if (ctxt->userData == ctxt)
7304 user_data = NULL;
7305 else
7306 user_data = ctxt->userData;
7307
7308 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7309 ctxt->depth++;
7310 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7311 ent->content, user_data, NULL);
7312 ctxt->depth--;
7313 } else if (ent->etype ==
7314 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7315 ctxt->depth++;
7316 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7317 ctxt->sax, user_data, ctxt->depth,
7318 ent->URI, ent->ExternalID, NULL);
7319 ctxt->depth--;
7320 } else {
7321 ret = XML_ERR_ENTITY_PE_INTERNAL;
7322 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7323 "invalid entity type found\n", NULL);
7324 }
7325 if (ret == XML_ERR_ENTITY_LOOP) {
7326 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7327 return;
7328 }
7329 }
7330 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7331 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7332 /*
7333 * Entity reference callback comes second, it's somewhat
7334 * superfluous but a compatibility to historical behaviour
7335 */
7336 ctxt->sax->reference(ctxt->userData, ent->name);
7337 }
7338 return;
7339 }
7340
7341 /*
7342 * If we didn't get any children for the entity being built
7343 */
7344 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7345 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7346 /*
7347 * Create a node.
7348 */
7349 ctxt->sax->reference(ctxt->userData, ent->name);
7350 return;
7351 }
7352
7353 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7354 /*
7355 * There is a problem on the handling of _private for entities
7356 * (bug 155816): Should we copy the content of the field from
7357 * the entity (possibly overwriting some value set by the user
7358 * when a copy is created), should we leave it alone, or should
7359 * we try to take care of different situations? The problem
7360 * is exacerbated by the usage of this field by the xmlReader.
7361 * To fix this bug, we look at _private on the created node
7362 * and, if it's NULL, we copy in whatever was in the entity.
7363 * If it's not NULL we leave it alone. This is somewhat of a
7364 * hack - maybe we should have further tests to determine
7365 * what to do.
7366 */
7367 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7368 /*
7369 * Seems we are generating the DOM content, do
7370 * a simple tree copy for all references except the first
7371 * In the first occurrence list contains the replacement.
7372 */
7373 if (((list == NULL) && (ent->owner == 0)) ||
7374 (ctxt->parseMode == XML_PARSE_READER)) {
7375 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7376
7377 /*
7378 * We are copying here, make sure there is no abuse
7379 */
7380 ctxt->sizeentcopy += ent->length + 5;
7381 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7382 return;
7383
7384 /*
7385 * when operating on a reader, the entities definitions
7386 * are always owning the entities subtree.
7387 if (ctxt->parseMode == XML_PARSE_READER)
7388 ent->owner = 1;
7389 */
7390
7391 cur = ent->children;
7392 while (cur != NULL) {
7393 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7394 if (nw != NULL) {
7395 if (nw->_private == NULL)
7396 nw->_private = cur->_private;
7397 if (firstChild == NULL){
7398 firstChild = nw;
7399 }
7400 nw = xmlAddChild(ctxt->node, nw);
7401 }
7402 if (cur == ent->last) {
7403 /*
7404 * needed to detect some strange empty
7405 * node cases in the reader tests
7406 */
7407 if ((ctxt->parseMode == XML_PARSE_READER) &&
7408 (nw != NULL) &&
7409 (nw->type == XML_ELEMENT_NODE) &&
7410 (nw->children == NULL))
7411 nw->extra = 1;
7412
7413 break;
7414 }
7415 cur = cur->next;
7416 }
7417 #ifdef LIBXML_LEGACY_ENABLED
7418 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7419 xmlAddEntityReference(ent, firstChild, nw);
7420 #endif /* LIBXML_LEGACY_ENABLED */
7421 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7422 xmlNodePtr nw = NULL, cur, next, last,
7423 firstChild = NULL;
7424
7425 /*
7426 * We are copying here, make sure there is no abuse
7427 */
7428 ctxt->sizeentcopy += ent->length + 5;
7429 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7430 return;
7431
7432 /*
7433 * Copy the entity child list and make it the new
7434 * entity child list. The goal is to make sure any
7435 * ID or REF referenced will be the one from the
7436 * document content and not the entity copy.
7437 */
7438 cur = ent->children;
7439 ent->children = NULL;
7440 last = ent->last;
7441 ent->last = NULL;
7442 while (cur != NULL) {
7443 next = cur->next;
7444 cur->next = NULL;
7445 cur->parent = NULL;
7446 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7447 if (nw != NULL) {
7448 if (nw->_private == NULL)
7449 nw->_private = cur->_private;
7450 if (firstChild == NULL){
7451 firstChild = cur;
7452 }
7453 xmlAddChild((xmlNodePtr) ent, nw);
7454 xmlAddChild(ctxt->node, cur);
7455 }
7456 if (cur == last)
7457 break;
7458 cur = next;
7459 }
7460 if (ent->owner == 0)
7461 ent->owner = 1;
7462 #ifdef LIBXML_LEGACY_ENABLED
7463 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7464 xmlAddEntityReference(ent, firstChild, nw);
7465 #endif /* LIBXML_LEGACY_ENABLED */
7466 } else {
7467 const xmlChar *nbktext;
7468
7469 /*
7470 * the name change is to avoid coalescing of the
7471 * node with a possible previous text one which
7472 * would make ent->children a dangling pointer
7473 */
7474 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7475 -1);
7476 if (ent->children->type == XML_TEXT_NODE)
7477 ent->children->name = nbktext;
7478 if ((ent->last != ent->children) &&
7479 (ent->last->type == XML_TEXT_NODE))
7480 ent->last->name = nbktext;
7481 xmlAddChildList(ctxt->node, ent->children);
7482 }
7483
7484 /*
7485 * This is to avoid a nasty side effect, see
7486 * characters() in SAX.c
7487 */
7488 ctxt->nodemem = 0;
7489 ctxt->nodelen = 0;
7490 return;
7491 }
7492 }
7493 }
7494
7495 /**
7496 * xmlParseEntityRef:
7497 * @ctxt: an XML parser context
7498 *
7499 * parse ENTITY references declarations
7500 *
7501 * [68] EntityRef ::= '&' Name ';'
7502 *
7503 * [ WFC: Entity Declared ]
7504 * In a document without any DTD, a document with only an internal DTD
7505 * subset which contains no parameter entity references, or a document
7506 * with "standalone='yes'", the Name given in the entity reference
7507 * must match that in an entity declaration, except that well-formed
7508 * documents need not declare any of the following entities: amp, lt,
7509 * gt, apos, quot. The declaration of a parameter entity must precede
7510 * any reference to it. Similarly, the declaration of a general entity
7511 * must precede any reference to it which appears in a default value in an
7512 * attribute-list declaration. Note that if entities are declared in the
7513 * external subset or in external parameter entities, a non-validating
7514 * processor is not obligated to read and process their declarations;
7515 * for such documents, the rule that an entity must be declared is a
7516 * well-formedness constraint only if standalone='yes'.
7517 *
7518 * [ WFC: Parsed Entity ]
7519 * An entity reference must not contain the name of an unparsed entity
7520 *
7521 * Returns the xmlEntityPtr if found, or NULL otherwise.
7522 */
7523 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7524 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7525 const xmlChar *name;
7526 xmlEntityPtr ent = NULL;
7527
7528 GROW;
7529 if (ctxt->instate == XML_PARSER_EOF)
7530 return(NULL);
7531
7532 if (RAW != '&')
7533 return(NULL);
7534 NEXT;
7535 name = xmlParseName(ctxt);
7536 if (name == NULL) {
7537 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7538 "xmlParseEntityRef: no name\n");
7539 return(NULL);
7540 }
7541 if (RAW != ';') {
7542 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7543 return(NULL);
7544 }
7545 NEXT;
7546
7547 /*
7548 * Predefined entities override any extra definition
7549 */
7550 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7551 ent = xmlGetPredefinedEntity(name);
7552 if (ent != NULL)
7553 return(ent);
7554 }
7555
7556 /*
7557 * Increase the number of entity references parsed
7558 */
7559 ctxt->nbentities++;
7560
7561 /*
7562 * Ask first SAX for entity resolution, otherwise try the
7563 * entities which may have stored in the parser context.
7564 */
7565 if (ctxt->sax != NULL) {
7566 if (ctxt->sax->getEntity != NULL)
7567 ent = ctxt->sax->getEntity(ctxt->userData, name);
7568 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7569 (ctxt->options & XML_PARSE_OLDSAX))
7570 ent = xmlGetPredefinedEntity(name);
7571 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7572 (ctxt->userData==ctxt)) {
7573 ent = xmlSAX2GetEntity(ctxt, name);
7574 }
7575 }
7576 if (ctxt->instate == XML_PARSER_EOF)
7577 return(NULL);
7578 /*
7579 * [ WFC: Entity Declared ]
7580 * In a document without any DTD, a document with only an
7581 * internal DTD subset which contains no parameter entity
7582 * references, or a document with "standalone='yes'", the
7583 * Name given in the entity reference must match that in an
7584 * entity declaration, except that well-formed documents
7585 * need not declare any of the following entities: amp, lt,
7586 * gt, apos, quot.
7587 * The declaration of a parameter entity must precede any
7588 * reference to it.
7589 * Similarly, the declaration of a general entity must
7590 * precede any reference to it which appears in a default
7591 * value in an attribute-list declaration. Note that if
7592 * entities are declared in the external subset or in
7593 * external parameter entities, a non-validating processor
7594 * is not obligated to read and process their declarations;
7595 * for such documents, the rule that an entity must be
7596 * declared is a well-formedness constraint only if
7597 * standalone='yes'.
7598 */
7599 if (ent == NULL) {
7600 if ((ctxt->standalone == 1) ||
7601 ((ctxt->hasExternalSubset == 0) &&
7602 (ctxt->hasPErefs == 0))) {
7603 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7604 "Entity '%s' not defined\n", name);
7605 } else {
7606 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7607 "Entity '%s' not defined\n", name);
7608 if ((ctxt->inSubset == 0) &&
7609 (ctxt->sax != NULL) &&
7610 (ctxt->sax->reference != NULL)) {
7611 ctxt->sax->reference(ctxt->userData, name);
7612 }
7613 }
7614 xmlParserEntityCheck(ctxt, 0, ent, 0);
7615 ctxt->valid = 0;
7616 }
7617
7618 /*
7619 * [ WFC: Parsed Entity ]
7620 * An entity reference must not contain the name of an
7621 * unparsed entity
7622 */
7623 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7624 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7625 "Entity reference to unparsed entity %s\n", name);
7626 }
7627
7628 /*
7629 * [ WFC: No External Entity References ]
7630 * Attribute values cannot contain direct or indirect
7631 * entity references to external entities.
7632 */
7633 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7634 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7635 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7636 "Attribute references external entity '%s'\n", name);
7637 }
7638 /*
7639 * [ WFC: No < in Attribute Values ]
7640 * The replacement text of any entity referred to directly or
7641 * indirectly in an attribute value (other than "<") must
7642 * not contain a <.
7643 */
7644 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7645 (ent != NULL) &&
7646 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7647 if (((ent->checked & 1) || (ent->checked == 0)) &&
7648 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7649 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7650 "'<' in entity '%s' is not allowed in attributes values\n", name);
7651 }
7652 }
7653
7654 /*
7655 * Internal check, no parameter entities here ...
7656 */
7657 else {
7658 switch (ent->etype) {
7659 case XML_INTERNAL_PARAMETER_ENTITY:
7660 case XML_EXTERNAL_PARAMETER_ENTITY:
7661 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7662 "Attempt to reference the parameter entity '%s'\n",
7663 name);
7664 break;
7665 default:
7666 break;
7667 }
7668 }
7669
7670 /*
7671 * [ WFC: No Recursion ]
7672 * A parsed entity must not contain a recursive reference
7673 * to itself, either directly or indirectly.
7674 * Done somewhere else
7675 */
7676 return(ent);
7677 }
7678
7679 /**
7680 * xmlParseStringEntityRef:
7681 * @ctxt: an XML parser context
7682 * @str: a pointer to an index in the string
7683 *
7684 * parse ENTITY references declarations, but this version parses it from
7685 * a string value.
7686 *
7687 * [68] EntityRef ::= '&' Name ';'
7688 *
7689 * [ WFC: Entity Declared ]
7690 * In a document without any DTD, a document with only an internal DTD
7691 * subset which contains no parameter entity references, or a document
7692 * with "standalone='yes'", the Name given in the entity reference
7693 * must match that in an entity declaration, except that well-formed
7694 * documents need not declare any of the following entities: amp, lt,
7695 * gt, apos, quot. The declaration of a parameter entity must precede
7696 * any reference to it. Similarly, the declaration of a general entity
7697 * must precede any reference to it which appears in a default value in an
7698 * attribute-list declaration. Note that if entities are declared in the
7699 * external subset or in external parameter entities, a non-validating
7700 * processor is not obligated to read and process their declarations;
7701 * for such documents, the rule that an entity must be declared is a
7702 * well-formedness constraint only if standalone='yes'.
7703 *
7704 * [ WFC: Parsed Entity ]
7705 * An entity reference must not contain the name of an unparsed entity
7706 *
7707 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7708 * is updated to the current location in the string.
7709 */
7710 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7711 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7712 xmlChar *name;
7713 const xmlChar *ptr;
7714 xmlChar cur;
7715 xmlEntityPtr ent = NULL;
7716
7717 if ((str == NULL) || (*str == NULL))
7718 return(NULL);
7719 ptr = *str;
7720 cur = *ptr;
7721 if (cur != '&')
7722 return(NULL);
7723
7724 ptr++;
7725 name = xmlParseStringName(ctxt, &ptr);
7726 if (name == NULL) {
7727 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7728 "xmlParseStringEntityRef: no name\n");
7729 *str = ptr;
7730 return(NULL);
7731 }
7732 if (*ptr != ';') {
7733 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7734 xmlFree(name);
7735 *str = ptr;
7736 return(NULL);
7737 }
7738 ptr++;
7739
7740
7741 /*
7742 * Predefined entities override any extra definition
7743 */
7744 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7745 ent = xmlGetPredefinedEntity(name);
7746 if (ent != NULL) {
7747 xmlFree(name);
7748 *str = ptr;
7749 return(ent);
7750 }
7751 }
7752
7753 /*
7754 * Increase the number of entity references parsed
7755 */
7756 ctxt->nbentities++;
7757
7758 /*
7759 * Ask first SAX for entity resolution, otherwise try the
7760 * entities which may have stored in the parser context.
7761 */
7762 if (ctxt->sax != NULL) {
7763 if (ctxt->sax->getEntity != NULL)
7764 ent = ctxt->sax->getEntity(ctxt->userData, name);
7765 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7766 ent = xmlGetPredefinedEntity(name);
7767 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7768 ent = xmlSAX2GetEntity(ctxt, name);
7769 }
7770 }
7771 if (ctxt->instate == XML_PARSER_EOF) {
7772 xmlFree(name);
7773 return(NULL);
7774 }
7775
7776 /*
7777 * [ WFC: Entity Declared ]
7778 * In a document without any DTD, a document with only an
7779 * internal DTD subset which contains no parameter entity
7780 * references, or a document with "standalone='yes'", the
7781 * Name given in the entity reference must match that in an
7782 * entity declaration, except that well-formed documents
7783 * need not declare any of the following entities: amp, lt,
7784 * gt, apos, quot.
7785 * The declaration of a parameter entity must precede any
7786 * reference to it.
7787 * Similarly, the declaration of a general entity must
7788 * precede any reference to it which appears in a default
7789 * value in an attribute-list declaration. Note that if
7790 * entities are declared in the external subset or in
7791 * external parameter entities, a non-validating processor
7792 * is not obligated to read and process their declarations;
7793 * for such documents, the rule that an entity must be
7794 * declared is a well-formedness constraint only if
7795 * standalone='yes'.
7796 */
7797 if (ent == NULL) {
7798 if ((ctxt->standalone == 1) ||
7799 ((ctxt->hasExternalSubset == 0) &&
7800 (ctxt->hasPErefs == 0))) {
7801 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7802 "Entity '%s' not defined\n", name);
7803 } else {
7804 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7805 "Entity '%s' not defined\n",
7806 name);
7807 }
7808 xmlParserEntityCheck(ctxt, 0, ent, 0);
7809 /* TODO ? check regressions ctxt->valid = 0; */
7810 }
7811
7812 /*
7813 * [ WFC: Parsed Entity ]
7814 * An entity reference must not contain the name of an
7815 * unparsed entity
7816 */
7817 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7818 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7819 "Entity reference to unparsed entity %s\n", name);
7820 }
7821
7822 /*
7823 * [ WFC: No External Entity References ]
7824 * Attribute values cannot contain direct or indirect
7825 * entity references to external entities.
7826 */
7827 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7828 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7829 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7830 "Attribute references external entity '%s'\n", name);
7831 }
7832 /*
7833 * [ WFC: No < in Attribute Values ]
7834 * The replacement text of any entity referred to directly or
7835 * indirectly in an attribute value (other than "<") must
7836 * not contain a <.
7837 */
7838 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7839 (ent != NULL) && (ent->content != NULL) &&
7840 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7841 (xmlStrchr(ent->content, '<'))) {
7842 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7843 "'<' in entity '%s' is not allowed in attributes values\n",
7844 name);
7845 }
7846
7847 /*
7848 * Internal check, no parameter entities here ...
7849 */
7850 else {
7851 switch (ent->etype) {
7852 case XML_INTERNAL_PARAMETER_ENTITY:
7853 case XML_EXTERNAL_PARAMETER_ENTITY:
7854 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7855 "Attempt to reference the parameter entity '%s'\n",
7856 name);
7857 break;
7858 default:
7859 break;
7860 }
7861 }
7862
7863 /*
7864 * [ WFC: No Recursion ]
7865 * A parsed entity must not contain a recursive reference
7866 * to itself, either directly or indirectly.
7867 * Done somewhere else
7868 */
7869
7870 xmlFree(name);
7871 *str = ptr;
7872 return(ent);
7873 }
7874
7875 /**
7876 * xmlParsePEReference:
7877 * @ctxt: an XML parser context
7878 *
7879 * parse PEReference declarations
7880 * The entity content is handled directly by pushing it's content as
7881 * a new input stream.
7882 *
7883 * [69] PEReference ::= '%' Name ';'
7884 *
7885 * [ WFC: No Recursion ]
7886 * A parsed entity must not contain a recursive
7887 * reference to itself, either directly or indirectly.
7888 *
7889 * [ WFC: Entity Declared ]
7890 * In a document without any DTD, a document with only an internal DTD
7891 * subset which contains no parameter entity references, or a document
7892 * with "standalone='yes'", ... ... The declaration of a parameter
7893 * entity must precede any reference to it...
7894 *
7895 * [ VC: Entity Declared ]
7896 * In a document with an external subset or external parameter entities
7897 * with "standalone='no'", ... ... The declaration of a parameter entity
7898 * must precede any reference to it...
7899 *
7900 * [ WFC: In DTD ]
7901 * Parameter-entity references may only appear in the DTD.
7902 * NOTE: misleading but this is handled.
7903 */
7904 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)7905 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7906 {
7907 const xmlChar *name;
7908 xmlEntityPtr entity = NULL;
7909 xmlParserInputPtr input;
7910
7911 if (RAW != '%')
7912 return;
7913 NEXT;
7914 name = xmlParseName(ctxt);
7915 if (name == NULL) {
7916 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7917 return;
7918 }
7919 if (xmlParserDebugEntities)
7920 xmlGenericError(xmlGenericErrorContext,
7921 "PEReference: %s\n", name);
7922 if (RAW != ';') {
7923 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7924 return;
7925 }
7926
7927 NEXT;
7928
7929 /*
7930 * Increase the number of entity references parsed
7931 */
7932 ctxt->nbentities++;
7933
7934 /*
7935 * Request the entity from SAX
7936 */
7937 if ((ctxt->sax != NULL) &&
7938 (ctxt->sax->getParameterEntity != NULL))
7939 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7940 if (ctxt->instate == XML_PARSER_EOF)
7941 return;
7942 if (entity == NULL) {
7943 /*
7944 * [ WFC: Entity Declared ]
7945 * In a document without any DTD, a document with only an
7946 * internal DTD subset which contains no parameter entity
7947 * references, or a document with "standalone='yes'", ...
7948 * ... The declaration of a parameter entity must precede
7949 * any reference to it...
7950 */
7951 if ((ctxt->standalone == 1) ||
7952 ((ctxt->hasExternalSubset == 0) &&
7953 (ctxt->hasPErefs == 0))) {
7954 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7955 "PEReference: %%%s; not found\n",
7956 name);
7957 } else {
7958 /*
7959 * [ VC: Entity Declared ]
7960 * In a document with an external subset or external
7961 * parameter entities with "standalone='no'", ...
7962 * ... The declaration of a parameter entity must
7963 * precede any reference to it...
7964 */
7965 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7966 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7967 "PEReference: %%%s; not found\n",
7968 name, NULL);
7969 } else
7970 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7971 "PEReference: %%%s; not found\n",
7972 name, NULL);
7973 ctxt->valid = 0;
7974 }
7975 xmlParserEntityCheck(ctxt, 0, NULL, 0);
7976 } else {
7977 /*
7978 * Internal checking in case the entity quest barfed
7979 */
7980 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7981 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7982 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7983 "Internal: %%%s; is not a parameter entity\n",
7984 name, NULL);
7985 } else {
7986 xmlChar start[4];
7987 xmlCharEncoding enc;
7988
7989 if (xmlParserEntityCheck(ctxt, 0, entity, 0))
7990 return;
7991
7992 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7993 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7994 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7995 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7996 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7997 (ctxt->replaceEntities == 0) &&
7998 (ctxt->validate == 0))
7999 return;
8000
8001 input = xmlNewEntityInputStream(ctxt, entity);
8002 if (xmlPushInput(ctxt, input) < 0) {
8003 xmlFreeInputStream(input);
8004 return;
8005 }
8006
8007 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8008 /*
8009 * Get the 4 first bytes and decode the charset
8010 * if enc != XML_CHAR_ENCODING_NONE
8011 * plug some encoding conversion routines.
8012 * Note that, since we may have some non-UTF8
8013 * encoding (like UTF16, bug 135229), the 'length'
8014 * is not known, but we can calculate based upon
8015 * the amount of data in the buffer.
8016 */
8017 GROW
8018 if (ctxt->instate == XML_PARSER_EOF)
8019 return;
8020 if ((ctxt->input->end - ctxt->input->cur)>=4) {
8021 start[0] = RAW;
8022 start[1] = NXT(1);
8023 start[2] = NXT(2);
8024 start[3] = NXT(3);
8025 enc = xmlDetectCharEncoding(start, 4);
8026 if (enc != XML_CHAR_ENCODING_NONE) {
8027 xmlSwitchEncoding(ctxt, enc);
8028 }
8029 }
8030
8031 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8032 (IS_BLANK_CH(NXT(5)))) {
8033 xmlParseTextDecl(ctxt);
8034 }
8035 }
8036 }
8037 }
8038 ctxt->hasPErefs = 1;
8039 }
8040
8041 /**
8042 * xmlLoadEntityContent:
8043 * @ctxt: an XML parser context
8044 * @entity: an unloaded system entity
8045 *
8046 * Load the original content of the given system entity from the
8047 * ExternalID/SystemID given. This is to be used for Included in Literal
8048 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8049 *
8050 * Returns 0 in case of success and -1 in case of failure
8051 */
8052 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)8053 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8054 xmlParserInputPtr input;
8055 xmlBufferPtr buf;
8056 int l, c;
8057 int count = 0;
8058
8059 if ((ctxt == NULL) || (entity == NULL) ||
8060 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8061 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8062 (entity->content != NULL)) {
8063 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8064 "xmlLoadEntityContent parameter error");
8065 return(-1);
8066 }
8067
8068 if (xmlParserDebugEntities)
8069 xmlGenericError(xmlGenericErrorContext,
8070 "Reading %s entity content input\n", entity->name);
8071
8072 buf = xmlBufferCreate();
8073 if (buf == NULL) {
8074 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8075 "xmlLoadEntityContent parameter error");
8076 return(-1);
8077 }
8078
8079 input = xmlNewEntityInputStream(ctxt, entity);
8080 if (input == NULL) {
8081 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8082 "xmlLoadEntityContent input error");
8083 xmlBufferFree(buf);
8084 return(-1);
8085 }
8086
8087 /*
8088 * Push the entity as the current input, read char by char
8089 * saving to the buffer until the end of the entity or an error
8090 */
8091 if (xmlPushInput(ctxt, input) < 0) {
8092 xmlBufferFree(buf);
8093 return(-1);
8094 }
8095
8096 GROW;
8097 c = CUR_CHAR(l);
8098 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8099 (IS_CHAR(c))) {
8100 xmlBufferAdd(buf, ctxt->input->cur, l);
8101 if (count++ > XML_PARSER_CHUNK_SIZE) {
8102 count = 0;
8103 GROW;
8104 if (ctxt->instate == XML_PARSER_EOF) {
8105 xmlBufferFree(buf);
8106 return(-1);
8107 }
8108 }
8109 NEXTL(l);
8110 c = CUR_CHAR(l);
8111 if (c == 0) {
8112 count = 0;
8113 GROW;
8114 if (ctxt->instate == XML_PARSER_EOF) {
8115 xmlBufferFree(buf);
8116 return(-1);
8117 }
8118 c = CUR_CHAR(l);
8119 }
8120 }
8121
8122 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8123 xmlPopInput(ctxt);
8124 } else if (!IS_CHAR(c)) {
8125 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8126 "xmlLoadEntityContent: invalid char value %d\n",
8127 c);
8128 xmlBufferFree(buf);
8129 return(-1);
8130 }
8131 entity->content = buf->content;
8132 buf->content = NULL;
8133 xmlBufferFree(buf);
8134
8135 return(0);
8136 }
8137
8138 /**
8139 * xmlParseStringPEReference:
8140 * @ctxt: an XML parser context
8141 * @str: a pointer to an index in the string
8142 *
8143 * parse PEReference declarations
8144 *
8145 * [69] PEReference ::= '%' Name ';'
8146 *
8147 * [ WFC: No Recursion ]
8148 * A parsed entity must not contain a recursive
8149 * reference to itself, either directly or indirectly.
8150 *
8151 * [ WFC: Entity Declared ]
8152 * In a document without any DTD, a document with only an internal DTD
8153 * subset which contains no parameter entity references, or a document
8154 * with "standalone='yes'", ... ... The declaration of a parameter
8155 * entity must precede any reference to it...
8156 *
8157 * [ VC: Entity Declared ]
8158 * In a document with an external subset or external parameter entities
8159 * with "standalone='no'", ... ... The declaration of a parameter entity
8160 * must precede any reference to it...
8161 *
8162 * [ WFC: In DTD ]
8163 * Parameter-entity references may only appear in the DTD.
8164 * NOTE: misleading but this is handled.
8165 *
8166 * Returns the string of the entity content.
8167 * str is updated to the current value of the index
8168 */
8169 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)8170 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8171 const xmlChar *ptr;
8172 xmlChar cur;
8173 xmlChar *name;
8174 xmlEntityPtr entity = NULL;
8175
8176 if ((str == NULL) || (*str == NULL)) return(NULL);
8177 ptr = *str;
8178 cur = *ptr;
8179 if (cur != '%')
8180 return(NULL);
8181 ptr++;
8182 name = xmlParseStringName(ctxt, &ptr);
8183 if (name == NULL) {
8184 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8185 "xmlParseStringPEReference: no name\n");
8186 *str = ptr;
8187 return(NULL);
8188 }
8189 cur = *ptr;
8190 if (cur != ';') {
8191 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8192 xmlFree(name);
8193 *str = ptr;
8194 return(NULL);
8195 }
8196 ptr++;
8197
8198 /*
8199 * Increase the number of entity references parsed
8200 */
8201 ctxt->nbentities++;
8202
8203 /*
8204 * Request the entity from SAX
8205 */
8206 if ((ctxt->sax != NULL) &&
8207 (ctxt->sax->getParameterEntity != NULL))
8208 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8209 if (ctxt->instate == XML_PARSER_EOF) {
8210 xmlFree(name);
8211 *str = ptr;
8212 return(NULL);
8213 }
8214 if (entity == NULL) {
8215 /*
8216 * [ WFC: Entity Declared ]
8217 * In a document without any DTD, a document with only an
8218 * internal DTD subset which contains no parameter entity
8219 * references, or a document with "standalone='yes'", ...
8220 * ... The declaration of a parameter entity must precede
8221 * any reference to it...
8222 */
8223 if ((ctxt->standalone == 1) ||
8224 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8225 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8226 "PEReference: %%%s; not found\n", name);
8227 } else {
8228 /*
8229 * [ VC: Entity Declared ]
8230 * In a document with an external subset or external
8231 * parameter entities with "standalone='no'", ...
8232 * ... The declaration of a parameter entity must
8233 * precede any reference to it...
8234 */
8235 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8236 "PEReference: %%%s; not found\n",
8237 name, NULL);
8238 ctxt->valid = 0;
8239 }
8240 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8241 } else {
8242 /*
8243 * Internal checking in case the entity quest barfed
8244 */
8245 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8246 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8247 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8248 "%%%s; is not a parameter entity\n",
8249 name, NULL);
8250 }
8251 }
8252 ctxt->hasPErefs = 1;
8253 xmlFree(name);
8254 *str = ptr;
8255 return(entity);
8256 }
8257
8258 /**
8259 * xmlParseDocTypeDecl:
8260 * @ctxt: an XML parser context
8261 *
8262 * parse a DOCTYPE declaration
8263 *
8264 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8265 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8266 *
8267 * [ VC: Root Element Type ]
8268 * The Name in the document type declaration must match the element
8269 * type of the root element.
8270 */
8271
8272 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)8273 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8274 const xmlChar *name = NULL;
8275 xmlChar *ExternalID = NULL;
8276 xmlChar *URI = NULL;
8277
8278 /*
8279 * We know that '<!DOCTYPE' has been detected.
8280 */
8281 SKIP(9);
8282
8283 SKIP_BLANKS;
8284
8285 /*
8286 * Parse the DOCTYPE name.
8287 */
8288 name = xmlParseName(ctxt);
8289 if (name == NULL) {
8290 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8291 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8292 }
8293 ctxt->intSubName = name;
8294
8295 SKIP_BLANKS;
8296
8297 /*
8298 * Check for SystemID and ExternalID
8299 */
8300 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8301
8302 if ((URI != NULL) || (ExternalID != NULL)) {
8303 ctxt->hasExternalSubset = 1;
8304 }
8305 ctxt->extSubURI = URI;
8306 ctxt->extSubSystem = ExternalID;
8307
8308 SKIP_BLANKS;
8309
8310 /*
8311 * Create and update the internal subset.
8312 */
8313 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8314 (!ctxt->disableSAX))
8315 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8316 if (ctxt->instate == XML_PARSER_EOF)
8317 return;
8318
8319 /*
8320 * Is there any internal subset declarations ?
8321 * they are handled separately in xmlParseInternalSubset()
8322 */
8323 if (RAW == '[')
8324 return;
8325
8326 /*
8327 * We should be at the end of the DOCTYPE declaration.
8328 */
8329 if (RAW != '>') {
8330 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8331 }
8332 NEXT;
8333 }
8334
8335 /**
8336 * xmlParseInternalSubset:
8337 * @ctxt: an XML parser context
8338 *
8339 * parse the internal subset declaration
8340 *
8341 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8342 */
8343
8344 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8345 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8346 /*
8347 * Is there any DTD definition ?
8348 */
8349 if (RAW == '[') {
8350 int baseInputNr = ctxt->inputNr;
8351 ctxt->instate = XML_PARSER_DTD;
8352 NEXT;
8353 /*
8354 * Parse the succession of Markup declarations and
8355 * PEReferences.
8356 * Subsequence (markupdecl | PEReference | S)*
8357 */
8358 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8359 (ctxt->instate != XML_PARSER_EOF)) {
8360 const xmlChar *check = CUR_PTR;
8361 unsigned int cons = ctxt->input->consumed;
8362
8363 SKIP_BLANKS;
8364 xmlParseMarkupDecl(ctxt);
8365 xmlParsePEReference(ctxt);
8366
8367 /*
8368 * Conditional sections are allowed from external entities included
8369 * by PE References in the internal subset.
8370 */
8371 if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8372 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8373 xmlParseConditionalSections(ctxt);
8374 }
8375
8376 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8377 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8378 "xmlParseInternalSubset: error detected in Markup declaration\n");
8379 if (ctxt->inputNr > baseInputNr)
8380 xmlPopInput(ctxt);
8381 else
8382 break;
8383 }
8384 }
8385 if (RAW == ']') {
8386 NEXT;
8387 SKIP_BLANKS;
8388 }
8389 }
8390
8391 /*
8392 * We should be at the end of the DOCTYPE declaration.
8393 */
8394 if (RAW != '>') {
8395 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8396 return;
8397 }
8398 NEXT;
8399 }
8400
8401 #ifdef LIBXML_SAX1_ENABLED
8402 /**
8403 * xmlParseAttribute:
8404 * @ctxt: an XML parser context
8405 * @value: a xmlChar ** used to store the value of the attribute
8406 *
8407 * parse an attribute
8408 *
8409 * [41] Attribute ::= Name Eq AttValue
8410 *
8411 * [ WFC: No External Entity References ]
8412 * Attribute values cannot contain direct or indirect entity references
8413 * to external entities.
8414 *
8415 * [ WFC: No < in Attribute Values ]
8416 * The replacement text of any entity referred to directly or indirectly in
8417 * an attribute value (other than "<") must not contain a <.
8418 *
8419 * [ VC: Attribute Value Type ]
8420 * The attribute must have been declared; the value must be of the type
8421 * declared for it.
8422 *
8423 * [25] Eq ::= S? '=' S?
8424 *
8425 * With namespace:
8426 *
8427 * [NS 11] Attribute ::= QName Eq AttValue
8428 *
8429 * Also the case QName == xmlns:??? is handled independently as a namespace
8430 * definition.
8431 *
8432 * Returns the attribute name, and the value in *value.
8433 */
8434
8435 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8436 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8437 const xmlChar *name;
8438 xmlChar *val;
8439
8440 *value = NULL;
8441 GROW;
8442 name = xmlParseName(ctxt);
8443 if (name == NULL) {
8444 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8445 "error parsing attribute name\n");
8446 return(NULL);
8447 }
8448
8449 /*
8450 * read the value
8451 */
8452 SKIP_BLANKS;
8453 if (RAW == '=') {
8454 NEXT;
8455 SKIP_BLANKS;
8456 val = xmlParseAttValue(ctxt);
8457 ctxt->instate = XML_PARSER_CONTENT;
8458 } else {
8459 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8460 "Specification mandates value for attribute %s\n", name);
8461 return(NULL);
8462 }
8463
8464 /*
8465 * Check that xml:lang conforms to the specification
8466 * No more registered as an error, just generate a warning now
8467 * since this was deprecated in XML second edition
8468 */
8469 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8470 if (!xmlCheckLanguageID(val)) {
8471 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8472 "Malformed value for xml:lang : %s\n",
8473 val, NULL);
8474 }
8475 }
8476
8477 /*
8478 * Check that xml:space conforms to the specification
8479 */
8480 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8481 if (xmlStrEqual(val, BAD_CAST "default"))
8482 *(ctxt->space) = 0;
8483 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8484 *(ctxt->space) = 1;
8485 else {
8486 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8487 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8488 val, NULL);
8489 }
8490 }
8491
8492 *value = val;
8493 return(name);
8494 }
8495
8496 /**
8497 * xmlParseStartTag:
8498 * @ctxt: an XML parser context
8499 *
8500 * parse a start of tag either for rule element or
8501 * EmptyElement. In both case we don't parse the tag closing chars.
8502 *
8503 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8504 *
8505 * [ WFC: Unique Att Spec ]
8506 * No attribute name may appear more than once in the same start-tag or
8507 * empty-element tag.
8508 *
8509 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8510 *
8511 * [ WFC: Unique Att Spec ]
8512 * No attribute name may appear more than once in the same start-tag or
8513 * empty-element tag.
8514 *
8515 * With namespace:
8516 *
8517 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8518 *
8519 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8520 *
8521 * Returns the element name parsed
8522 */
8523
8524 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8525 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8526 const xmlChar *name;
8527 const xmlChar *attname;
8528 xmlChar *attvalue;
8529 const xmlChar **atts = ctxt->atts;
8530 int nbatts = 0;
8531 int maxatts = ctxt->maxatts;
8532 int i;
8533
8534 if (RAW != '<') return(NULL);
8535 NEXT1;
8536
8537 name = xmlParseName(ctxt);
8538 if (name == NULL) {
8539 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8540 "xmlParseStartTag: invalid element name\n");
8541 return(NULL);
8542 }
8543
8544 /*
8545 * Now parse the attributes, it ends up with the ending
8546 *
8547 * (S Attribute)* S?
8548 */
8549 SKIP_BLANKS;
8550 GROW;
8551
8552 while (((RAW != '>') &&
8553 ((RAW != '/') || (NXT(1) != '>')) &&
8554 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8555 const xmlChar *q = CUR_PTR;
8556 unsigned int cons = ctxt->input->consumed;
8557
8558 attname = xmlParseAttribute(ctxt, &attvalue);
8559 if ((attname != NULL) && (attvalue != NULL)) {
8560 /*
8561 * [ WFC: Unique Att Spec ]
8562 * No attribute name may appear more than once in the same
8563 * start-tag or empty-element tag.
8564 */
8565 for (i = 0; i < nbatts;i += 2) {
8566 if (xmlStrEqual(atts[i], attname)) {
8567 xmlErrAttributeDup(ctxt, NULL, attname);
8568 xmlFree(attvalue);
8569 goto failed;
8570 }
8571 }
8572 /*
8573 * Add the pair to atts
8574 */
8575 if (atts == NULL) {
8576 maxatts = 22; /* allow for 10 attrs by default */
8577 atts = (const xmlChar **)
8578 xmlMalloc(maxatts * sizeof(xmlChar *));
8579 if (atts == NULL) {
8580 xmlErrMemory(ctxt, NULL);
8581 if (attvalue != NULL)
8582 xmlFree(attvalue);
8583 goto failed;
8584 }
8585 ctxt->atts = atts;
8586 ctxt->maxatts = maxatts;
8587 } else if (nbatts + 4 > maxatts) {
8588 const xmlChar **n;
8589
8590 maxatts *= 2;
8591 n = (const xmlChar **) xmlRealloc((void *) atts,
8592 maxatts * sizeof(const xmlChar *));
8593 if (n == NULL) {
8594 xmlErrMemory(ctxt, NULL);
8595 if (attvalue != NULL)
8596 xmlFree(attvalue);
8597 goto failed;
8598 }
8599 atts = n;
8600 ctxt->atts = atts;
8601 ctxt->maxatts = maxatts;
8602 }
8603 atts[nbatts++] = attname;
8604 atts[nbatts++] = attvalue;
8605 atts[nbatts] = NULL;
8606 atts[nbatts + 1] = NULL;
8607 } else {
8608 if (attvalue != NULL)
8609 xmlFree(attvalue);
8610 }
8611
8612 failed:
8613
8614 GROW
8615 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8616 break;
8617 if (SKIP_BLANKS == 0) {
8618 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8619 "attributes construct error\n");
8620 }
8621 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8622 (attname == NULL) && (attvalue == NULL)) {
8623 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8624 "xmlParseStartTag: problem parsing attributes\n");
8625 break;
8626 }
8627 SHRINK;
8628 GROW;
8629 }
8630
8631 /*
8632 * SAX: Start of Element !
8633 */
8634 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8635 (!ctxt->disableSAX)) {
8636 if (nbatts > 0)
8637 ctxt->sax->startElement(ctxt->userData, name, atts);
8638 else
8639 ctxt->sax->startElement(ctxt->userData, name, NULL);
8640 }
8641
8642 if (atts != NULL) {
8643 /* Free only the content strings */
8644 for (i = 1;i < nbatts;i+=2)
8645 if (atts[i] != NULL)
8646 xmlFree((xmlChar *) atts[i]);
8647 }
8648 return(name);
8649 }
8650
8651 /**
8652 * xmlParseEndTag1:
8653 * @ctxt: an XML parser context
8654 * @line: line of the start tag
8655 * @nsNr: number of namespaces on the start tag
8656 *
8657 * parse an end of tag
8658 *
8659 * [42] ETag ::= '</' Name S? '>'
8660 *
8661 * With namespace
8662 *
8663 * [NS 9] ETag ::= '</' QName S? '>'
8664 */
8665
8666 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8667 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8668 const xmlChar *name;
8669
8670 GROW;
8671 if ((RAW != '<') || (NXT(1) != '/')) {
8672 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8673 "xmlParseEndTag: '</' not found\n");
8674 return;
8675 }
8676 SKIP(2);
8677
8678 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8679
8680 /*
8681 * We should definitely be at the ending "S? '>'" part
8682 */
8683 GROW;
8684 SKIP_BLANKS;
8685 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8686 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8687 } else
8688 NEXT1;
8689
8690 /*
8691 * [ WFC: Element Type Match ]
8692 * The Name in an element's end-tag must match the element type in the
8693 * start-tag.
8694 *
8695 */
8696 if (name != (xmlChar*)1) {
8697 if (name == NULL) name = BAD_CAST "unparsable";
8698 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8699 "Opening and ending tag mismatch: %s line %d and %s\n",
8700 ctxt->name, line, name);
8701 }
8702
8703 /*
8704 * SAX: End of Tag
8705 */
8706 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8707 (!ctxt->disableSAX))
8708 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8709
8710 namePop(ctxt);
8711 spacePop(ctxt);
8712 return;
8713 }
8714
8715 /**
8716 * xmlParseEndTag:
8717 * @ctxt: an XML parser context
8718 *
8719 * parse an end of tag
8720 *
8721 * [42] ETag ::= '</' Name S? '>'
8722 *
8723 * With namespace
8724 *
8725 * [NS 9] ETag ::= '</' QName S? '>'
8726 */
8727
8728 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8729 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8730 xmlParseEndTag1(ctxt, 0);
8731 }
8732 #endif /* LIBXML_SAX1_ENABLED */
8733
8734 /************************************************************************
8735 * *
8736 * SAX 2 specific operations *
8737 * *
8738 ************************************************************************/
8739
8740 /*
8741 * xmlGetNamespace:
8742 * @ctxt: an XML parser context
8743 * @prefix: the prefix to lookup
8744 *
8745 * Lookup the namespace name for the @prefix (which ca be NULL)
8746 * The prefix must come from the @ctxt->dict dictionary
8747 *
8748 * Returns the namespace name or NULL if not bound
8749 */
8750 static const xmlChar *
xmlGetNamespace(xmlParserCtxtPtr ctxt,const xmlChar * prefix)8751 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8752 int i;
8753
8754 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8755 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8756 if (ctxt->nsTab[i] == prefix) {
8757 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8758 return(NULL);
8759 return(ctxt->nsTab[i + 1]);
8760 }
8761 return(NULL);
8762 }
8763
8764 /**
8765 * xmlParseQName:
8766 * @ctxt: an XML parser context
8767 * @prefix: pointer to store the prefix part
8768 *
8769 * parse an XML Namespace QName
8770 *
8771 * [6] QName ::= (Prefix ':')? LocalPart
8772 * [7] Prefix ::= NCName
8773 * [8] LocalPart ::= NCName
8774 *
8775 * Returns the Name parsed or NULL
8776 */
8777
8778 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8779 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8780 const xmlChar *l, *p;
8781
8782 GROW;
8783
8784 l = xmlParseNCName(ctxt);
8785 if (l == NULL) {
8786 if (CUR == ':') {
8787 l = xmlParseName(ctxt);
8788 if (l != NULL) {
8789 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8790 "Failed to parse QName '%s'\n", l, NULL, NULL);
8791 *prefix = NULL;
8792 return(l);
8793 }
8794 }
8795 return(NULL);
8796 }
8797 if (CUR == ':') {
8798 NEXT;
8799 p = l;
8800 l = xmlParseNCName(ctxt);
8801 if (l == NULL) {
8802 xmlChar *tmp;
8803
8804 if (ctxt->instate == XML_PARSER_EOF)
8805 return(NULL);
8806 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8807 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8808 l = xmlParseNmtoken(ctxt);
8809 if (l == NULL) {
8810 if (ctxt->instate == XML_PARSER_EOF)
8811 return(NULL);
8812 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8813 } else {
8814 tmp = xmlBuildQName(l, p, NULL, 0);
8815 xmlFree((char *)l);
8816 }
8817 p = xmlDictLookup(ctxt->dict, tmp, -1);
8818 if (tmp != NULL) xmlFree(tmp);
8819 *prefix = NULL;
8820 return(p);
8821 }
8822 if (CUR == ':') {
8823 xmlChar *tmp;
8824
8825 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8826 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8827 NEXT;
8828 tmp = (xmlChar *) xmlParseName(ctxt);
8829 if (tmp != NULL) {
8830 tmp = xmlBuildQName(tmp, l, NULL, 0);
8831 l = xmlDictLookup(ctxt->dict, tmp, -1);
8832 if (tmp != NULL) xmlFree(tmp);
8833 *prefix = p;
8834 return(l);
8835 }
8836 if (ctxt->instate == XML_PARSER_EOF)
8837 return(NULL);
8838 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8839 l = xmlDictLookup(ctxt->dict, tmp, -1);
8840 if (tmp != NULL) xmlFree(tmp);
8841 *prefix = p;
8842 return(l);
8843 }
8844 *prefix = p;
8845 } else
8846 *prefix = NULL;
8847 return(l);
8848 }
8849
8850 /**
8851 * xmlParseQNameAndCompare:
8852 * @ctxt: an XML parser context
8853 * @name: the localname
8854 * @prefix: the prefix, if any.
8855 *
8856 * parse an XML name and compares for match
8857 * (specialized for endtag parsing)
8858 *
8859 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8860 * and the name for mismatch
8861 */
8862
8863 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8864 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8865 xmlChar const *prefix) {
8866 const xmlChar *cmp;
8867 const xmlChar *in;
8868 const xmlChar *ret;
8869 const xmlChar *prefix2;
8870
8871 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8872
8873 GROW;
8874 in = ctxt->input->cur;
8875
8876 cmp = prefix;
8877 while (*in != 0 && *in == *cmp) {
8878 ++in;
8879 ++cmp;
8880 }
8881 if ((*cmp == 0) && (*in == ':')) {
8882 in++;
8883 cmp = name;
8884 while (*in != 0 && *in == *cmp) {
8885 ++in;
8886 ++cmp;
8887 }
8888 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8889 /* success */
8890 ctxt->input->col += in - ctxt->input->cur;
8891 ctxt->input->cur = in;
8892 return((const xmlChar*) 1);
8893 }
8894 }
8895 /*
8896 * all strings coms from the dictionary, equality can be done directly
8897 */
8898 ret = xmlParseQName (ctxt, &prefix2);
8899 if ((ret == name) && (prefix == prefix2))
8900 return((const xmlChar*) 1);
8901 return ret;
8902 }
8903
8904 /**
8905 * xmlParseAttValueInternal:
8906 * @ctxt: an XML parser context
8907 * @len: attribute len result
8908 * @alloc: whether the attribute was reallocated as a new string
8909 * @normalize: if 1 then further non-CDATA normalization must be done
8910 *
8911 * parse a value for an attribute.
8912 * NOTE: if no normalization is needed, the routine will return pointers
8913 * directly from the data buffer.
8914 *
8915 * 3.3.3 Attribute-Value Normalization:
8916 * Before the value of an attribute is passed to the application or
8917 * checked for validity, the XML processor must normalize it as follows:
8918 * - a character reference is processed by appending the referenced
8919 * character to the attribute value
8920 * - an entity reference is processed by recursively processing the
8921 * replacement text of the entity
8922 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8923 * appending #x20 to the normalized value, except that only a single
8924 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8925 * parsed entity or the literal entity value of an internal parsed entity
8926 * - other characters are processed by appending them to the normalized value
8927 * If the declared value is not CDATA, then the XML processor must further
8928 * process the normalized attribute value by discarding any leading and
8929 * trailing space (#x20) characters, and by replacing sequences of space
8930 * (#x20) characters by a single space (#x20) character.
8931 * All attributes for which no declaration has been read should be treated
8932 * by a non-validating parser as if declared CDATA.
8933 *
8934 * Returns the AttValue parsed or NULL. The value has to be freed by the
8935 * caller if it was copied, this can be detected by val[*len] == 0.
8936 */
8937
8938 #define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8939 const xmlChar *oldbase = ctxt->input->base;\
8940 GROW;\
8941 if (ctxt->instate == XML_PARSER_EOF)\
8942 return(NULL);\
8943 if (oldbase != ctxt->input->base) {\
8944 ptrdiff_t delta = ctxt->input->base - oldbase;\
8945 start = start + delta;\
8946 in = in + delta;\
8947 }\
8948 end = ctxt->input->end;
8949
8950 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * len,int * alloc,int normalize)8951 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8952 int normalize)
8953 {
8954 xmlChar limit = 0;
8955 const xmlChar *in = NULL, *start, *end, *last;
8956 xmlChar *ret = NULL;
8957 int line, col;
8958
8959 GROW;
8960 in = (xmlChar *) CUR_PTR;
8961 line = ctxt->input->line;
8962 col = ctxt->input->col;
8963 if (*in != '"' && *in != '\'') {
8964 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8965 return (NULL);
8966 }
8967 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8968
8969 /*
8970 * try to handle in this routine the most common case where no
8971 * allocation of a new string is required and where content is
8972 * pure ASCII.
8973 */
8974 limit = *in++;
8975 col++;
8976 end = ctxt->input->end;
8977 start = in;
8978 if (in >= end) {
8979 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8980 }
8981 if (normalize) {
8982 /*
8983 * Skip any leading spaces
8984 */
8985 while ((in < end) && (*in != limit) &&
8986 ((*in == 0x20) || (*in == 0x9) ||
8987 (*in == 0xA) || (*in == 0xD))) {
8988 if (*in == 0xA) {
8989 line++; col = 1;
8990 } else {
8991 col++;
8992 }
8993 in++;
8994 start = in;
8995 if (in >= end) {
8996 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8997 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8998 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8999 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9000 "AttValue length too long\n");
9001 return(NULL);
9002 }
9003 }
9004 }
9005 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9006 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9007 col++;
9008 if ((*in++ == 0x20) && (*in == 0x20)) break;
9009 if (in >= end) {
9010 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9011 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9012 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9013 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9014 "AttValue length too long\n");
9015 return(NULL);
9016 }
9017 }
9018 }
9019 last = in;
9020 /*
9021 * skip the trailing blanks
9022 */
9023 while ((last[-1] == 0x20) && (last > start)) last--;
9024 while ((in < end) && (*in != limit) &&
9025 ((*in == 0x20) || (*in == 0x9) ||
9026 (*in == 0xA) || (*in == 0xD))) {
9027 if (*in == 0xA) {
9028 line++, col = 1;
9029 } else {
9030 col++;
9031 }
9032 in++;
9033 if (in >= end) {
9034 const xmlChar *oldbase = ctxt->input->base;
9035 GROW;
9036 if (ctxt->instate == XML_PARSER_EOF)
9037 return(NULL);
9038 if (oldbase != ctxt->input->base) {
9039 ptrdiff_t delta = ctxt->input->base - oldbase;
9040 start = start + delta;
9041 in = in + delta;
9042 last = last + delta;
9043 }
9044 end = ctxt->input->end;
9045 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9046 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9047 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9048 "AttValue length too long\n");
9049 return(NULL);
9050 }
9051 }
9052 }
9053 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9054 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9055 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9056 "AttValue length too long\n");
9057 return(NULL);
9058 }
9059 if (*in != limit) goto need_complex;
9060 } else {
9061 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9062 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9063 in++;
9064 col++;
9065 if (in >= end) {
9066 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9067 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9068 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9069 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9070 "AttValue length too long\n");
9071 return(NULL);
9072 }
9073 }
9074 }
9075 last = in;
9076 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9077 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9078 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9079 "AttValue length too long\n");
9080 return(NULL);
9081 }
9082 if (*in != limit) goto need_complex;
9083 }
9084 in++;
9085 col++;
9086 if (len != NULL) {
9087 *len = last - start;
9088 ret = (xmlChar *) start;
9089 } else {
9090 if (alloc) *alloc = 1;
9091 ret = xmlStrndup(start, last - start);
9092 }
9093 CUR_PTR = in;
9094 ctxt->input->line = line;
9095 ctxt->input->col = col;
9096 if (alloc) *alloc = 0;
9097 return ret;
9098 need_complex:
9099 if (alloc) *alloc = 1;
9100 return xmlParseAttValueComplex(ctxt, len, normalize);
9101 }
9102
9103 /**
9104 * xmlParseAttribute2:
9105 * @ctxt: an XML parser context
9106 * @pref: the element prefix
9107 * @elem: the element name
9108 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9109 * @value: a xmlChar ** used to store the value of the attribute
9110 * @len: an int * to save the length of the attribute
9111 * @alloc: an int * to indicate if the attribute was allocated
9112 *
9113 * parse an attribute in the new SAX2 framework.
9114 *
9115 * Returns the attribute name, and the value in *value, .
9116 */
9117
9118 static const xmlChar *
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,const xmlChar ** prefix,xmlChar ** value,int * len,int * alloc)9119 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9120 const xmlChar * pref, const xmlChar * elem,
9121 const xmlChar ** prefix, xmlChar ** value,
9122 int *len, int *alloc)
9123 {
9124 const xmlChar *name;
9125 xmlChar *val, *internal_val = NULL;
9126 int normalize = 0;
9127
9128 *value = NULL;
9129 GROW;
9130 name = xmlParseQName(ctxt, prefix);
9131 if (name == NULL) {
9132 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9133 "error parsing attribute name\n");
9134 return (NULL);
9135 }
9136
9137 /*
9138 * get the type if needed
9139 */
9140 if (ctxt->attsSpecial != NULL) {
9141 int type;
9142
9143 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9144 pref, elem, *prefix, name);
9145 if (type != 0)
9146 normalize = 1;
9147 }
9148
9149 /*
9150 * read the value
9151 */
9152 SKIP_BLANKS;
9153 if (RAW == '=') {
9154 NEXT;
9155 SKIP_BLANKS;
9156 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9157 if (normalize) {
9158 /*
9159 * Sometimes a second normalisation pass for spaces is needed
9160 * but that only happens if charrefs or entities references
9161 * have been used in the attribute value, i.e. the attribute
9162 * value have been extracted in an allocated string already.
9163 */
9164 if (*alloc) {
9165 const xmlChar *val2;
9166
9167 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9168 if ((val2 != NULL) && (val2 != val)) {
9169 xmlFree(val);
9170 val = (xmlChar *) val2;
9171 }
9172 }
9173 }
9174 ctxt->instate = XML_PARSER_CONTENT;
9175 } else {
9176 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9177 "Specification mandates value for attribute %s\n",
9178 name);
9179 return (NULL);
9180 }
9181
9182 if (*prefix == ctxt->str_xml) {
9183 /*
9184 * Check that xml:lang conforms to the specification
9185 * No more registered as an error, just generate a warning now
9186 * since this was deprecated in XML second edition
9187 */
9188 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9189 internal_val = xmlStrndup(val, *len);
9190 if (!xmlCheckLanguageID(internal_val)) {
9191 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9192 "Malformed value for xml:lang : %s\n",
9193 internal_val, NULL);
9194 }
9195 }
9196
9197 /*
9198 * Check that xml:space conforms to the specification
9199 */
9200 if (xmlStrEqual(name, BAD_CAST "space")) {
9201 internal_val = xmlStrndup(val, *len);
9202 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9203 *(ctxt->space) = 0;
9204 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9205 *(ctxt->space) = 1;
9206 else {
9207 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9208 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9209 internal_val, NULL);
9210 }
9211 }
9212 if (internal_val) {
9213 xmlFree(internal_val);
9214 }
9215 }
9216
9217 *value = val;
9218 return (name);
9219 }
9220 /**
9221 * xmlParseStartTag2:
9222 * @ctxt: an XML parser context
9223 *
9224 * parse a start of tag either for rule element or
9225 * EmptyElement. In both case we don't parse the tag closing chars.
9226 * This routine is called when running SAX2 parsing
9227 *
9228 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9229 *
9230 * [ WFC: Unique Att Spec ]
9231 * No attribute name may appear more than once in the same start-tag or
9232 * empty-element tag.
9233 *
9234 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9235 *
9236 * [ WFC: Unique Att Spec ]
9237 * No attribute name may appear more than once in the same start-tag or
9238 * empty-element tag.
9239 *
9240 * With namespace:
9241 *
9242 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9243 *
9244 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9245 *
9246 * Returns the element name parsed
9247 */
9248
9249 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * tlen)9250 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9251 const xmlChar **URI, int *tlen) {
9252 const xmlChar *localname;
9253 const xmlChar *prefix;
9254 const xmlChar *attname;
9255 const xmlChar *aprefix;
9256 const xmlChar *nsname;
9257 xmlChar *attvalue;
9258 const xmlChar **atts = ctxt->atts;
9259 int maxatts = ctxt->maxatts;
9260 int nratts, nbatts, nbdef, inputid;
9261 int i, j, nbNs, attval;
9262 unsigned long cur;
9263 int nsNr = ctxt->nsNr;
9264
9265 if (RAW != '<') return(NULL);
9266 NEXT1;
9267
9268 /*
9269 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9270 * point since the attribute values may be stored as pointers to
9271 * the buffer and calling SHRINK would destroy them !
9272 * The Shrinking is only possible once the full set of attribute
9273 * callbacks have been done.
9274 */
9275 SHRINK;
9276 cur = ctxt->input->cur - ctxt->input->base;
9277 inputid = ctxt->input->id;
9278 nbatts = 0;
9279 nratts = 0;
9280 nbdef = 0;
9281 nbNs = 0;
9282 attval = 0;
9283 /* Forget any namespaces added during an earlier parse of this element. */
9284 ctxt->nsNr = nsNr;
9285
9286 localname = xmlParseQName(ctxt, &prefix);
9287 if (localname == NULL) {
9288 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9289 "StartTag: invalid element name\n");
9290 return(NULL);
9291 }
9292 *tlen = ctxt->input->cur - ctxt->input->base - cur;
9293
9294 /*
9295 * Now parse the attributes, it ends up with the ending
9296 *
9297 * (S Attribute)* S?
9298 */
9299 SKIP_BLANKS;
9300 GROW;
9301
9302 while (((RAW != '>') &&
9303 ((RAW != '/') || (NXT(1) != '>')) &&
9304 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9305 const xmlChar *q = CUR_PTR;
9306 unsigned int cons = ctxt->input->consumed;
9307 int len = -1, alloc = 0;
9308
9309 attname = xmlParseAttribute2(ctxt, prefix, localname,
9310 &aprefix, &attvalue, &len, &alloc);
9311 if ((attname == NULL) || (attvalue == NULL))
9312 goto next_attr;
9313 if (len < 0) len = xmlStrlen(attvalue);
9314
9315 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9316 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9317 xmlURIPtr uri;
9318
9319 if (URL == NULL) {
9320 xmlErrMemory(ctxt, "dictionary allocation failure");
9321 if ((attvalue != NULL) && (alloc != 0))
9322 xmlFree(attvalue);
9323 localname = NULL;
9324 goto done;
9325 }
9326 if (*URL != 0) {
9327 uri = xmlParseURI((const char *) URL);
9328 if (uri == NULL) {
9329 xmlNsErr(ctxt, XML_WAR_NS_URI,
9330 "xmlns: '%s' is not a valid URI\n",
9331 URL, NULL, NULL);
9332 } else {
9333 if (uri->scheme == NULL) {
9334 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9335 "xmlns: URI %s is not absolute\n",
9336 URL, NULL, NULL);
9337 }
9338 xmlFreeURI(uri);
9339 }
9340 if (URL == ctxt->str_xml_ns) {
9341 if (attname != ctxt->str_xml) {
9342 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9343 "xml namespace URI cannot be the default namespace\n",
9344 NULL, NULL, NULL);
9345 }
9346 goto next_attr;
9347 }
9348 if ((len == 29) &&
9349 (xmlStrEqual(URL,
9350 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9351 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9352 "reuse of the xmlns namespace name is forbidden\n",
9353 NULL, NULL, NULL);
9354 goto next_attr;
9355 }
9356 }
9357 /*
9358 * check that it's not a defined namespace
9359 */
9360 for (j = 1;j <= nbNs;j++)
9361 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9362 break;
9363 if (j <= nbNs)
9364 xmlErrAttributeDup(ctxt, NULL, attname);
9365 else
9366 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9367
9368 } else if (aprefix == ctxt->str_xmlns) {
9369 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9370 xmlURIPtr uri;
9371
9372 if (attname == ctxt->str_xml) {
9373 if (URL != ctxt->str_xml_ns) {
9374 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9375 "xml namespace prefix mapped to wrong URI\n",
9376 NULL, NULL, NULL);
9377 }
9378 /*
9379 * Do not keep a namespace definition node
9380 */
9381 goto next_attr;
9382 }
9383 if (URL == ctxt->str_xml_ns) {
9384 if (attname != ctxt->str_xml) {
9385 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9386 "xml namespace URI mapped to wrong prefix\n",
9387 NULL, NULL, NULL);
9388 }
9389 goto next_attr;
9390 }
9391 if (attname == ctxt->str_xmlns) {
9392 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9393 "redefinition of the xmlns prefix is forbidden\n",
9394 NULL, NULL, NULL);
9395 goto next_attr;
9396 }
9397 if ((len == 29) &&
9398 (xmlStrEqual(URL,
9399 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9400 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9401 "reuse of the xmlns namespace name is forbidden\n",
9402 NULL, NULL, NULL);
9403 goto next_attr;
9404 }
9405 if ((URL == NULL) || (URL[0] == 0)) {
9406 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9407 "xmlns:%s: Empty XML namespace is not allowed\n",
9408 attname, NULL, NULL);
9409 goto next_attr;
9410 } else {
9411 uri = xmlParseURI((const char *) URL);
9412 if (uri == NULL) {
9413 xmlNsErr(ctxt, XML_WAR_NS_URI,
9414 "xmlns:%s: '%s' is not a valid URI\n",
9415 attname, URL, NULL);
9416 } else {
9417 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9418 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9419 "xmlns:%s: URI %s is not absolute\n",
9420 attname, URL, NULL);
9421 }
9422 xmlFreeURI(uri);
9423 }
9424 }
9425
9426 /*
9427 * check that it's not a defined namespace
9428 */
9429 for (j = 1;j <= nbNs;j++)
9430 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9431 break;
9432 if (j <= nbNs)
9433 xmlErrAttributeDup(ctxt, aprefix, attname);
9434 else
9435 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9436
9437 } else {
9438 /*
9439 * Add the pair to atts
9440 */
9441 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9442 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9443 goto next_attr;
9444 }
9445 maxatts = ctxt->maxatts;
9446 atts = ctxt->atts;
9447 }
9448 ctxt->attallocs[nratts++] = alloc;
9449 atts[nbatts++] = attname;
9450 atts[nbatts++] = aprefix;
9451 /*
9452 * The namespace URI field is used temporarily to point at the
9453 * base of the current input buffer for non-alloced attributes.
9454 * When the input buffer is reallocated, all the pointers become
9455 * invalid, but they can be reconstructed later.
9456 */
9457 if (alloc)
9458 atts[nbatts++] = NULL;
9459 else
9460 atts[nbatts++] = ctxt->input->base;
9461 atts[nbatts++] = attvalue;
9462 attvalue += len;
9463 atts[nbatts++] = attvalue;
9464 /*
9465 * tag if some deallocation is needed
9466 */
9467 if (alloc != 0) attval = 1;
9468 attvalue = NULL; /* moved into atts */
9469 }
9470
9471 next_attr:
9472 if ((attvalue != NULL) && (alloc != 0)) {
9473 xmlFree(attvalue);
9474 attvalue = NULL;
9475 }
9476
9477 GROW
9478 if (ctxt->instate == XML_PARSER_EOF)
9479 break;
9480 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9481 break;
9482 if (SKIP_BLANKS == 0) {
9483 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9484 "attributes construct error\n");
9485 break;
9486 }
9487 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9488 (attname == NULL) && (attvalue == NULL)) {
9489 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9490 "xmlParseStartTag: problem parsing attributes\n");
9491 break;
9492 }
9493 GROW;
9494 }
9495
9496 if (ctxt->input->id != inputid) {
9497 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9498 "Unexpected change of input\n");
9499 localname = NULL;
9500 goto done;
9501 }
9502
9503 /* Reconstruct attribute value pointers. */
9504 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9505 if (atts[i+2] != NULL) {
9506 /*
9507 * Arithmetic on dangling pointers is technically undefined
9508 * behavior, but well...
9509 */
9510 ptrdiff_t offset = ctxt->input->base - atts[i+2];
9511 atts[i+2] = NULL; /* Reset repurposed namespace URI */
9512 atts[i+3] += offset; /* value */
9513 atts[i+4] += offset; /* valuend */
9514 }
9515 }
9516
9517 /*
9518 * The attributes defaulting
9519 */
9520 if (ctxt->attsDefault != NULL) {
9521 xmlDefAttrsPtr defaults;
9522
9523 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9524 if (defaults != NULL) {
9525 for (i = 0;i < defaults->nbAttrs;i++) {
9526 attname = defaults->values[5 * i];
9527 aprefix = defaults->values[5 * i + 1];
9528
9529 /*
9530 * special work for namespaces defaulted defs
9531 */
9532 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9533 /*
9534 * check that it's not a defined namespace
9535 */
9536 for (j = 1;j <= nbNs;j++)
9537 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9538 break;
9539 if (j <= nbNs) continue;
9540
9541 nsname = xmlGetNamespace(ctxt, NULL);
9542 if (nsname != defaults->values[5 * i + 2]) {
9543 if (nsPush(ctxt, NULL,
9544 defaults->values[5 * i + 2]) > 0)
9545 nbNs++;
9546 }
9547 } else if (aprefix == ctxt->str_xmlns) {
9548 /*
9549 * check that it's not a defined namespace
9550 */
9551 for (j = 1;j <= nbNs;j++)
9552 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9553 break;
9554 if (j <= nbNs) continue;
9555
9556 nsname = xmlGetNamespace(ctxt, attname);
9557 if (nsname != defaults->values[2]) {
9558 if (nsPush(ctxt, attname,
9559 defaults->values[5 * i + 2]) > 0)
9560 nbNs++;
9561 }
9562 } else {
9563 /*
9564 * check that it's not a defined attribute
9565 */
9566 for (j = 0;j < nbatts;j+=5) {
9567 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9568 break;
9569 }
9570 if (j < nbatts) continue;
9571
9572 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9573 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9574 localname = NULL;
9575 goto done;
9576 }
9577 maxatts = ctxt->maxatts;
9578 atts = ctxt->atts;
9579 }
9580 atts[nbatts++] = attname;
9581 atts[nbatts++] = aprefix;
9582 if (aprefix == NULL)
9583 atts[nbatts++] = NULL;
9584 else
9585 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9586 atts[nbatts++] = defaults->values[5 * i + 2];
9587 atts[nbatts++] = defaults->values[5 * i + 3];
9588 if ((ctxt->standalone == 1) &&
9589 (defaults->values[5 * i + 4] != NULL)) {
9590 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9591 "standalone: attribute %s on %s defaulted from external subset\n",
9592 attname, localname);
9593 }
9594 nbdef++;
9595 }
9596 }
9597 }
9598 }
9599
9600 /*
9601 * The attributes checkings
9602 */
9603 for (i = 0; i < nbatts;i += 5) {
9604 /*
9605 * The default namespace does not apply to attribute names.
9606 */
9607 if (atts[i + 1] != NULL) {
9608 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9609 if (nsname == NULL) {
9610 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9611 "Namespace prefix %s for %s on %s is not defined\n",
9612 atts[i + 1], atts[i], localname);
9613 }
9614 atts[i + 2] = nsname;
9615 } else
9616 nsname = NULL;
9617 /*
9618 * [ WFC: Unique Att Spec ]
9619 * No attribute name may appear more than once in the same
9620 * start-tag or empty-element tag.
9621 * As extended by the Namespace in XML REC.
9622 */
9623 for (j = 0; j < i;j += 5) {
9624 if (atts[i] == atts[j]) {
9625 if (atts[i+1] == atts[j+1]) {
9626 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9627 break;
9628 }
9629 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9630 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9631 "Namespaced Attribute %s in '%s' redefined\n",
9632 atts[i], nsname, NULL);
9633 break;
9634 }
9635 }
9636 }
9637 }
9638
9639 nsname = xmlGetNamespace(ctxt, prefix);
9640 if ((prefix != NULL) && (nsname == NULL)) {
9641 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9642 "Namespace prefix %s on %s is not defined\n",
9643 prefix, localname, NULL);
9644 }
9645 *pref = prefix;
9646 *URI = nsname;
9647
9648 /*
9649 * SAX: Start of Element !
9650 */
9651 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9652 (!ctxt->disableSAX)) {
9653 if (nbNs > 0)
9654 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9655 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9656 nbatts / 5, nbdef, atts);
9657 else
9658 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9659 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9660 }
9661
9662 done:
9663 /*
9664 * Free up attribute allocated strings if needed
9665 */
9666 if (attval != 0) {
9667 for (i = 3,j = 0; j < nratts;i += 5,j++)
9668 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9669 xmlFree((xmlChar *) atts[i]);
9670 }
9671
9672 return(localname);
9673 }
9674
9675 /**
9676 * xmlParseEndTag2:
9677 * @ctxt: an XML parser context
9678 * @line: line of the start tag
9679 * @nsNr: number of namespaces on the start tag
9680 *
9681 * parse an end of tag
9682 *
9683 * [42] ETag ::= '</' Name S? '>'
9684 *
9685 * With namespace
9686 *
9687 * [NS 9] ETag ::= '</' QName S? '>'
9688 */
9689
9690 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlStartTag * tag)9691 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9692 const xmlChar *name;
9693
9694 GROW;
9695 if ((RAW != '<') || (NXT(1) != '/')) {
9696 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9697 return;
9698 }
9699 SKIP(2);
9700
9701 if (tag->prefix == NULL)
9702 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9703 else
9704 name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9705
9706 /*
9707 * We should definitely be at the ending "S? '>'" part
9708 */
9709 GROW;
9710 if (ctxt->instate == XML_PARSER_EOF)
9711 return;
9712 SKIP_BLANKS;
9713 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9714 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9715 } else
9716 NEXT1;
9717
9718 /*
9719 * [ WFC: Element Type Match ]
9720 * The Name in an element's end-tag must match the element type in the
9721 * start-tag.
9722 *
9723 */
9724 if (name != (xmlChar*)1) {
9725 if (name == NULL) name = BAD_CAST "unparsable";
9726 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9727 "Opening and ending tag mismatch: %s line %d and %s\n",
9728 ctxt->name, tag->line, name);
9729 }
9730
9731 /*
9732 * SAX: End of Tag
9733 */
9734 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9735 (!ctxt->disableSAX))
9736 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9737 tag->URI);
9738
9739 spacePop(ctxt);
9740 if (tag->nsNr != 0)
9741 nsPop(ctxt, tag->nsNr);
9742 }
9743
9744 /**
9745 * xmlParseCDSect:
9746 * @ctxt: an XML parser context
9747 *
9748 * Parse escaped pure raw content.
9749 *
9750 * [18] CDSect ::= CDStart CData CDEnd
9751 *
9752 * [19] CDStart ::= '<![CDATA['
9753 *
9754 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9755 *
9756 * [21] CDEnd ::= ']]>'
9757 */
9758 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9759 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9760 xmlChar *buf = NULL;
9761 int len = 0;
9762 int size = XML_PARSER_BUFFER_SIZE;
9763 int r, rl;
9764 int s, sl;
9765 int cur, l;
9766 int count = 0;
9767
9768 /* Check 2.6.0 was NXT(0) not RAW */
9769 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9770 SKIP(9);
9771 } else
9772 return;
9773
9774 ctxt->instate = XML_PARSER_CDATA_SECTION;
9775 r = CUR_CHAR(rl);
9776 if (!IS_CHAR(r)) {
9777 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9778 ctxt->instate = XML_PARSER_CONTENT;
9779 return;
9780 }
9781 NEXTL(rl);
9782 s = CUR_CHAR(sl);
9783 if (!IS_CHAR(s)) {
9784 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9785 ctxt->instate = XML_PARSER_CONTENT;
9786 return;
9787 }
9788 NEXTL(sl);
9789 cur = CUR_CHAR(l);
9790 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9791 if (buf == NULL) {
9792 xmlErrMemory(ctxt, NULL);
9793 return;
9794 }
9795 while (IS_CHAR(cur) &&
9796 ((r != ']') || (s != ']') || (cur != '>'))) {
9797 if (len + 5 >= size) {
9798 xmlChar *tmp;
9799
9800 if ((size > XML_MAX_TEXT_LENGTH) &&
9801 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9802 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9803 "CData section too big found", NULL);
9804 xmlFree (buf);
9805 return;
9806 }
9807 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9808 if (tmp == NULL) {
9809 xmlFree(buf);
9810 xmlErrMemory(ctxt, NULL);
9811 return;
9812 }
9813 buf = tmp;
9814 size *= 2;
9815 }
9816 COPY_BUF(rl,buf,len,r);
9817 r = s;
9818 rl = sl;
9819 s = cur;
9820 sl = l;
9821 count++;
9822 if (count > 50) {
9823 SHRINK;
9824 GROW;
9825 if (ctxt->instate == XML_PARSER_EOF) {
9826 xmlFree(buf);
9827 return;
9828 }
9829 count = 0;
9830 }
9831 NEXTL(l);
9832 cur = CUR_CHAR(l);
9833 }
9834 buf[len] = 0;
9835 ctxt->instate = XML_PARSER_CONTENT;
9836 if (cur != '>') {
9837 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9838 "CData section not finished\n%.50s\n", buf);
9839 xmlFree(buf);
9840 return;
9841 }
9842 NEXTL(l);
9843
9844 /*
9845 * OK the buffer is to be consumed as cdata.
9846 */
9847 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9848 if (ctxt->sax->cdataBlock != NULL)
9849 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9850 else if (ctxt->sax->characters != NULL)
9851 ctxt->sax->characters(ctxt->userData, buf, len);
9852 }
9853 xmlFree(buf);
9854 }
9855
9856 /**
9857 * xmlParseContentInternal:
9858 * @ctxt: an XML parser context
9859 *
9860 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9861 * unexpected EOF to the caller.
9862 */
9863
9864 static void
xmlParseContentInternal(xmlParserCtxtPtr ctxt)9865 xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9866 int nameNr = ctxt->nameNr;
9867
9868 GROW;
9869 while ((RAW != 0) &&
9870 (ctxt->instate != XML_PARSER_EOF)) {
9871 const xmlChar *test = CUR_PTR;
9872 unsigned int cons = ctxt->input->consumed;
9873 const xmlChar *cur = ctxt->input->cur;
9874
9875 /*
9876 * First case : a Processing Instruction.
9877 */
9878 if ((*cur == '<') && (cur[1] == '?')) {
9879 xmlParsePI(ctxt);
9880 }
9881
9882 /*
9883 * Second case : a CDSection
9884 */
9885 /* 2.6.0 test was *cur not RAW */
9886 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9887 xmlParseCDSect(ctxt);
9888 }
9889
9890 /*
9891 * Third case : a comment
9892 */
9893 else if ((*cur == '<') && (NXT(1) == '!') &&
9894 (NXT(2) == '-') && (NXT(3) == '-')) {
9895 xmlParseComment(ctxt);
9896 ctxt->instate = XML_PARSER_CONTENT;
9897 }
9898
9899 /*
9900 * Fourth case : a sub-element.
9901 */
9902 else if (*cur == '<') {
9903 if (NXT(1) == '/') {
9904 if (ctxt->nameNr <= nameNr)
9905 break;
9906 xmlParseElementEnd(ctxt);
9907 } else {
9908 xmlParseElementStart(ctxt);
9909 }
9910 }
9911
9912 /*
9913 * Fifth case : a reference. If if has not been resolved,
9914 * parsing returns it's Name, create the node
9915 */
9916
9917 else if (*cur == '&') {
9918 xmlParseReference(ctxt);
9919 }
9920
9921 /*
9922 * Last case, text. Note that References are handled directly.
9923 */
9924 else {
9925 xmlParseCharData(ctxt, 0);
9926 }
9927
9928 GROW;
9929 SHRINK;
9930
9931 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9932 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9933 "detected an error in element content\n");
9934 xmlHaltParser(ctxt);
9935 break;
9936 }
9937 }
9938 }
9939
9940 /**
9941 * xmlParseContent:
9942 * @ctxt: an XML parser context
9943 *
9944 * Parse a content sequence. Stops at EOF or '</'.
9945 *
9946 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9947 */
9948
9949 void
xmlParseContent(xmlParserCtxtPtr ctxt)9950 xmlParseContent(xmlParserCtxtPtr ctxt) {
9951 int nameNr = ctxt->nameNr;
9952
9953 xmlParseContentInternal(ctxt);
9954
9955 if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
9956 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9957 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9958 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9959 "Premature end of data in tag %s line %d\n",
9960 name, line, NULL);
9961 }
9962 }
9963
9964 /**
9965 * xmlParseElement:
9966 * @ctxt: an XML parser context
9967 *
9968 * parse an XML element
9969 *
9970 * [39] element ::= EmptyElemTag | STag content ETag
9971 *
9972 * [ WFC: Element Type Match ]
9973 * The Name in an element's end-tag must match the element type in the
9974 * start-tag.
9975 *
9976 */
9977
9978 void
xmlParseElement(xmlParserCtxtPtr ctxt)9979 xmlParseElement(xmlParserCtxtPtr ctxt) {
9980 if (xmlParseElementStart(ctxt) != 0)
9981 return;
9982
9983 xmlParseContentInternal(ctxt);
9984 if (ctxt->instate == XML_PARSER_EOF)
9985 return;
9986
9987 if (CUR == 0) {
9988 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9989 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9990 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9991 "Premature end of data in tag %s line %d\n",
9992 name, line, NULL);
9993 return;
9994 }
9995
9996 xmlParseElementEnd(ctxt);
9997 }
9998
9999 /**
10000 * xmlParseElementStart:
10001 * @ctxt: an XML parser context
10002 *
10003 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10004 * opening tag was parsed, 1 if an empty element was parsed.
10005 */
10006 static int
xmlParseElementStart(xmlParserCtxtPtr ctxt)10007 xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10008 const xmlChar *name;
10009 const xmlChar *prefix = NULL;
10010 const xmlChar *URI = NULL;
10011 xmlParserNodeInfo node_info;
10012 int line, tlen = 0;
10013 xmlNodePtr ret;
10014 int nsNr = ctxt->nsNr;
10015
10016 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10017 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10018 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10019 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10020 xmlParserMaxDepth);
10021 xmlHaltParser(ctxt);
10022 return(-1);
10023 }
10024
10025 /* Capture start position */
10026 if (ctxt->record_info) {
10027 node_info.begin_pos = ctxt->input->consumed +
10028 (CUR_PTR - ctxt->input->base);
10029 node_info.begin_line = ctxt->input->line;
10030 }
10031
10032 if (ctxt->spaceNr == 0)
10033 spacePush(ctxt, -1);
10034 else if (*ctxt->space == -2)
10035 spacePush(ctxt, -1);
10036 else
10037 spacePush(ctxt, *ctxt->space);
10038
10039 line = ctxt->input->line;
10040 #ifdef LIBXML_SAX1_ENABLED
10041 if (ctxt->sax2)
10042 #endif /* LIBXML_SAX1_ENABLED */
10043 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10044 #ifdef LIBXML_SAX1_ENABLED
10045 else
10046 name = xmlParseStartTag(ctxt);
10047 #endif /* LIBXML_SAX1_ENABLED */
10048 if (ctxt->instate == XML_PARSER_EOF)
10049 return(-1);
10050 if (name == NULL) {
10051 spacePop(ctxt);
10052 return(-1);
10053 }
10054 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10055 ret = ctxt->node;
10056
10057 #ifdef LIBXML_VALID_ENABLED
10058 /*
10059 * [ VC: Root Element Type ]
10060 * The Name in the document type declaration must match the element
10061 * type of the root element.
10062 */
10063 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10064 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10065 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10066 #endif /* LIBXML_VALID_ENABLED */
10067
10068 /*
10069 * Check for an Empty Element.
10070 */
10071 if ((RAW == '/') && (NXT(1) == '>')) {
10072 SKIP(2);
10073 if (ctxt->sax2) {
10074 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10075 (!ctxt->disableSAX))
10076 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10077 #ifdef LIBXML_SAX1_ENABLED
10078 } else {
10079 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10080 (!ctxt->disableSAX))
10081 ctxt->sax->endElement(ctxt->userData, name);
10082 #endif /* LIBXML_SAX1_ENABLED */
10083 }
10084 namePop(ctxt);
10085 spacePop(ctxt);
10086 if (nsNr != ctxt->nsNr)
10087 nsPop(ctxt, ctxt->nsNr - nsNr);
10088 if ( ret != NULL && ctxt->record_info ) {
10089 node_info.end_pos = ctxt->input->consumed +
10090 (CUR_PTR - ctxt->input->base);
10091 node_info.end_line = ctxt->input->line;
10092 node_info.node = ret;
10093 xmlParserAddNodeInfo(ctxt, &node_info);
10094 }
10095 return(1);
10096 }
10097 if (RAW == '>') {
10098 NEXT1;
10099 } else {
10100 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10101 "Couldn't find end of Start Tag %s line %d\n",
10102 name, line, NULL);
10103
10104 /*
10105 * end of parsing of this node.
10106 */
10107 nodePop(ctxt);
10108 namePop(ctxt);
10109 spacePop(ctxt);
10110 if (nsNr != ctxt->nsNr)
10111 nsPop(ctxt, ctxt->nsNr - nsNr);
10112
10113 /*
10114 * Capture end position and add node
10115 */
10116 if ( ret != NULL && ctxt->record_info ) {
10117 node_info.end_pos = ctxt->input->consumed +
10118 (CUR_PTR - ctxt->input->base);
10119 node_info.end_line = ctxt->input->line;
10120 node_info.node = ret;
10121 xmlParserAddNodeInfo(ctxt, &node_info);
10122 }
10123 return(-1);
10124 }
10125
10126 return(0);
10127 }
10128
10129 /**
10130 * xmlParseElementEnd:
10131 * @ctxt: an XML parser context
10132 *
10133 * Parse the end of an XML element.
10134 */
10135 static void
xmlParseElementEnd(xmlParserCtxtPtr ctxt)10136 xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10137 xmlParserNodeInfo node_info;
10138 xmlNodePtr ret = ctxt->node;
10139
10140 if (ctxt->nameNr <= 0)
10141 return;
10142
10143 /*
10144 * parse the end of tag: '</' should be here.
10145 */
10146 if (ctxt->sax2) {
10147 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10148 namePop(ctxt);
10149 }
10150 #ifdef LIBXML_SAX1_ENABLED
10151 else
10152 xmlParseEndTag1(ctxt, 0);
10153 #endif /* LIBXML_SAX1_ENABLED */
10154
10155 /*
10156 * Capture end position and add node
10157 */
10158 if ( ret != NULL && ctxt->record_info ) {
10159 node_info.end_pos = ctxt->input->consumed +
10160 (CUR_PTR - ctxt->input->base);
10161 node_info.end_line = ctxt->input->line;
10162 node_info.node = ret;
10163 xmlParserAddNodeInfo(ctxt, &node_info);
10164 }
10165 }
10166
10167 /**
10168 * xmlParseVersionNum:
10169 * @ctxt: an XML parser context
10170 *
10171 * parse the XML version value.
10172 *
10173 * [26] VersionNum ::= '1.' [0-9]+
10174 *
10175 * In practice allow [0-9].[0-9]+ at that level
10176 *
10177 * Returns the string giving the XML version number, or NULL
10178 */
10179 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)10180 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10181 xmlChar *buf = NULL;
10182 int len = 0;
10183 int size = 10;
10184 xmlChar cur;
10185
10186 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10187 if (buf == NULL) {
10188 xmlErrMemory(ctxt, NULL);
10189 return(NULL);
10190 }
10191 cur = CUR;
10192 if (!((cur >= '0') && (cur <= '9'))) {
10193 xmlFree(buf);
10194 return(NULL);
10195 }
10196 buf[len++] = cur;
10197 NEXT;
10198 cur=CUR;
10199 if (cur != '.') {
10200 xmlFree(buf);
10201 return(NULL);
10202 }
10203 buf[len++] = cur;
10204 NEXT;
10205 cur=CUR;
10206 while ((cur >= '0') && (cur <= '9')) {
10207 if (len + 1 >= size) {
10208 xmlChar *tmp;
10209
10210 size *= 2;
10211 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10212 if (tmp == NULL) {
10213 xmlFree(buf);
10214 xmlErrMemory(ctxt, NULL);
10215 return(NULL);
10216 }
10217 buf = tmp;
10218 }
10219 buf[len++] = cur;
10220 NEXT;
10221 cur=CUR;
10222 }
10223 buf[len] = 0;
10224 return(buf);
10225 }
10226
10227 /**
10228 * xmlParseVersionInfo:
10229 * @ctxt: an XML parser context
10230 *
10231 * parse the XML version.
10232 *
10233 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10234 *
10235 * [25] Eq ::= S? '=' S?
10236 *
10237 * Returns the version string, e.g. "1.0"
10238 */
10239
10240 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)10241 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10242 xmlChar *version = NULL;
10243
10244 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10245 SKIP(7);
10246 SKIP_BLANKS;
10247 if (RAW != '=') {
10248 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10249 return(NULL);
10250 }
10251 NEXT;
10252 SKIP_BLANKS;
10253 if (RAW == '"') {
10254 NEXT;
10255 version = xmlParseVersionNum(ctxt);
10256 if (RAW != '"') {
10257 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10258 } else
10259 NEXT;
10260 } else if (RAW == '\''){
10261 NEXT;
10262 version = xmlParseVersionNum(ctxt);
10263 if (RAW != '\'') {
10264 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10265 } else
10266 NEXT;
10267 } else {
10268 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10269 }
10270 }
10271 return(version);
10272 }
10273
10274 /**
10275 * xmlParseEncName:
10276 * @ctxt: an XML parser context
10277 *
10278 * parse the XML encoding name
10279 *
10280 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10281 *
10282 * Returns the encoding name value or NULL
10283 */
10284 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)10285 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10286 xmlChar *buf = NULL;
10287 int len = 0;
10288 int size = 10;
10289 xmlChar cur;
10290
10291 cur = CUR;
10292 if (((cur >= 'a') && (cur <= 'z')) ||
10293 ((cur >= 'A') && (cur <= 'Z'))) {
10294 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10295 if (buf == NULL) {
10296 xmlErrMemory(ctxt, NULL);
10297 return(NULL);
10298 }
10299
10300 buf[len++] = cur;
10301 NEXT;
10302 cur = CUR;
10303 while (((cur >= 'a') && (cur <= 'z')) ||
10304 ((cur >= 'A') && (cur <= 'Z')) ||
10305 ((cur >= '0') && (cur <= '9')) ||
10306 (cur == '.') || (cur == '_') ||
10307 (cur == '-')) {
10308 if (len + 1 >= size) {
10309 xmlChar *tmp;
10310
10311 size *= 2;
10312 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10313 if (tmp == NULL) {
10314 xmlErrMemory(ctxt, NULL);
10315 xmlFree(buf);
10316 return(NULL);
10317 }
10318 buf = tmp;
10319 }
10320 buf[len++] = cur;
10321 NEXT;
10322 cur = CUR;
10323 if (cur == 0) {
10324 SHRINK;
10325 GROW;
10326 cur = CUR;
10327 }
10328 }
10329 buf[len] = 0;
10330 } else {
10331 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10332 }
10333 return(buf);
10334 }
10335
10336 /**
10337 * xmlParseEncodingDecl:
10338 * @ctxt: an XML parser context
10339 *
10340 * parse the XML encoding declaration
10341 *
10342 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10343 *
10344 * this setups the conversion filters.
10345 *
10346 * Returns the encoding value or NULL
10347 */
10348
10349 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)10350 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10351 xmlChar *encoding = NULL;
10352
10353 SKIP_BLANKS;
10354 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10355 SKIP(8);
10356 SKIP_BLANKS;
10357 if (RAW != '=') {
10358 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10359 return(NULL);
10360 }
10361 NEXT;
10362 SKIP_BLANKS;
10363 if (RAW == '"') {
10364 NEXT;
10365 encoding = xmlParseEncName(ctxt);
10366 if (RAW != '"') {
10367 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10368 xmlFree((xmlChar *) encoding);
10369 return(NULL);
10370 } else
10371 NEXT;
10372 } else if (RAW == '\''){
10373 NEXT;
10374 encoding = xmlParseEncName(ctxt);
10375 if (RAW != '\'') {
10376 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10377 xmlFree((xmlChar *) encoding);
10378 return(NULL);
10379 } else
10380 NEXT;
10381 } else {
10382 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10383 }
10384
10385 /*
10386 * Non standard parsing, allowing the user to ignore encoding
10387 */
10388 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10389 xmlFree((xmlChar *) encoding);
10390 return(NULL);
10391 }
10392
10393 /*
10394 * UTF-16 encoding switch has already taken place at this stage,
10395 * more over the little-endian/big-endian selection is already done
10396 */
10397 if ((encoding != NULL) &&
10398 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10399 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10400 /*
10401 * If no encoding was passed to the parser, that we are
10402 * using UTF-16 and no decoder is present i.e. the
10403 * document is apparently UTF-8 compatible, then raise an
10404 * encoding mismatch fatal error
10405 */
10406 if ((ctxt->encoding == NULL) &&
10407 (ctxt->input->buf != NULL) &&
10408 (ctxt->input->buf->encoder == NULL)) {
10409 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10410 "Document labelled UTF-16 but has UTF-8 content\n");
10411 }
10412 if (ctxt->encoding != NULL)
10413 xmlFree((xmlChar *) ctxt->encoding);
10414 ctxt->encoding = encoding;
10415 }
10416 /*
10417 * UTF-8 encoding is handled natively
10418 */
10419 else if ((encoding != NULL) &&
10420 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10421 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10422 if (ctxt->encoding != NULL)
10423 xmlFree((xmlChar *) ctxt->encoding);
10424 ctxt->encoding = encoding;
10425 }
10426 else if (encoding != NULL) {
10427 xmlCharEncodingHandlerPtr handler;
10428
10429 if (ctxt->input->encoding != NULL)
10430 xmlFree((xmlChar *) ctxt->input->encoding);
10431 ctxt->input->encoding = encoding;
10432
10433 handler = xmlFindCharEncodingHandler((const char *) encoding);
10434 if (handler != NULL) {
10435 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10436 /* failed to convert */
10437 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10438 return(NULL);
10439 }
10440 } else {
10441 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10442 "Unsupported encoding %s\n", encoding);
10443 return(NULL);
10444 }
10445 }
10446 }
10447 return(encoding);
10448 }
10449
10450 /**
10451 * xmlParseSDDecl:
10452 * @ctxt: an XML parser context
10453 *
10454 * parse the XML standalone declaration
10455 *
10456 * [32] SDDecl ::= S 'standalone' Eq
10457 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10458 *
10459 * [ VC: Standalone Document Declaration ]
10460 * TODO The standalone document declaration must have the value "no"
10461 * if any external markup declarations contain declarations of:
10462 * - attributes with default values, if elements to which these
10463 * attributes apply appear in the document without specifications
10464 * of values for these attributes, or
10465 * - entities (other than amp, lt, gt, apos, quot), if references
10466 * to those entities appear in the document, or
10467 * - attributes with values subject to normalization, where the
10468 * attribute appears in the document with a value which will change
10469 * as a result of normalization, or
10470 * - element types with element content, if white space occurs directly
10471 * within any instance of those types.
10472 *
10473 * Returns:
10474 * 1 if standalone="yes"
10475 * 0 if standalone="no"
10476 * -2 if standalone attribute is missing or invalid
10477 * (A standalone value of -2 means that the XML declaration was found,
10478 * but no value was specified for the standalone attribute).
10479 */
10480
10481 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10482 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10483 int standalone = -2;
10484
10485 SKIP_BLANKS;
10486 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10487 SKIP(10);
10488 SKIP_BLANKS;
10489 if (RAW != '=') {
10490 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10491 return(standalone);
10492 }
10493 NEXT;
10494 SKIP_BLANKS;
10495 if (RAW == '\''){
10496 NEXT;
10497 if ((RAW == 'n') && (NXT(1) == 'o')) {
10498 standalone = 0;
10499 SKIP(2);
10500 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10501 (NXT(2) == 's')) {
10502 standalone = 1;
10503 SKIP(3);
10504 } else {
10505 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10506 }
10507 if (RAW != '\'') {
10508 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10509 } else
10510 NEXT;
10511 } else if (RAW == '"'){
10512 NEXT;
10513 if ((RAW == 'n') && (NXT(1) == 'o')) {
10514 standalone = 0;
10515 SKIP(2);
10516 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10517 (NXT(2) == 's')) {
10518 standalone = 1;
10519 SKIP(3);
10520 } else {
10521 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10522 }
10523 if (RAW != '"') {
10524 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10525 } else
10526 NEXT;
10527 } else {
10528 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10529 }
10530 }
10531 return(standalone);
10532 }
10533
10534 /**
10535 * xmlParseXMLDecl:
10536 * @ctxt: an XML parser context
10537 *
10538 * parse an XML declaration header
10539 *
10540 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10541 */
10542
10543 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10544 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10545 xmlChar *version;
10546
10547 /*
10548 * This value for standalone indicates that the document has an
10549 * XML declaration but it does not have a standalone attribute.
10550 * It will be overwritten later if a standalone attribute is found.
10551 */
10552 ctxt->input->standalone = -2;
10553
10554 /*
10555 * We know that '<?xml' is here.
10556 */
10557 SKIP(5);
10558
10559 if (!IS_BLANK_CH(RAW)) {
10560 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10561 "Blank needed after '<?xml'\n");
10562 }
10563 SKIP_BLANKS;
10564
10565 /*
10566 * We must have the VersionInfo here.
10567 */
10568 version = xmlParseVersionInfo(ctxt);
10569 if (version == NULL) {
10570 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10571 } else {
10572 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10573 /*
10574 * Changed here for XML-1.0 5th edition
10575 */
10576 if (ctxt->options & XML_PARSE_OLD10) {
10577 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10578 "Unsupported version '%s'\n",
10579 version);
10580 } else {
10581 if ((version[0] == '1') && ((version[1] == '.'))) {
10582 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10583 "Unsupported version '%s'\n",
10584 version, NULL);
10585 } else {
10586 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10587 "Unsupported version '%s'\n",
10588 version);
10589 }
10590 }
10591 }
10592 if (ctxt->version != NULL)
10593 xmlFree((void *) ctxt->version);
10594 ctxt->version = version;
10595 }
10596
10597 /*
10598 * We may have the encoding declaration
10599 */
10600 if (!IS_BLANK_CH(RAW)) {
10601 if ((RAW == '?') && (NXT(1) == '>')) {
10602 SKIP(2);
10603 return;
10604 }
10605 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10606 }
10607 xmlParseEncodingDecl(ctxt);
10608 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10609 (ctxt->instate == XML_PARSER_EOF)) {
10610 /*
10611 * The XML REC instructs us to stop parsing right here
10612 */
10613 return;
10614 }
10615
10616 /*
10617 * We may have the standalone status.
10618 */
10619 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10620 if ((RAW == '?') && (NXT(1) == '>')) {
10621 SKIP(2);
10622 return;
10623 }
10624 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10625 }
10626
10627 /*
10628 * We can grow the input buffer freely at that point
10629 */
10630 GROW;
10631
10632 SKIP_BLANKS;
10633 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10634
10635 SKIP_BLANKS;
10636 if ((RAW == '?') && (NXT(1) == '>')) {
10637 SKIP(2);
10638 } else if (RAW == '>') {
10639 /* Deprecated old WD ... */
10640 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10641 NEXT;
10642 } else {
10643 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10644 MOVETO_ENDTAG(CUR_PTR);
10645 NEXT;
10646 }
10647 }
10648
10649 /**
10650 * xmlParseMisc:
10651 * @ctxt: an XML parser context
10652 *
10653 * parse an XML Misc* optional field.
10654 *
10655 * [27] Misc ::= Comment | PI | S
10656 */
10657
10658 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10659 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10660 while ((ctxt->instate != XML_PARSER_EOF) &&
10661 (((RAW == '<') && (NXT(1) == '?')) ||
10662 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10663 IS_BLANK_CH(CUR))) {
10664 if ((RAW == '<') && (NXT(1) == '?')) {
10665 xmlParsePI(ctxt);
10666 } else if (IS_BLANK_CH(CUR)) {
10667 NEXT;
10668 } else
10669 xmlParseComment(ctxt);
10670 }
10671 }
10672
10673 /**
10674 * xmlParseDocument:
10675 * @ctxt: an XML parser context
10676 *
10677 * parse an XML document (and build a tree if using the standard SAX
10678 * interface).
10679 *
10680 * [1] document ::= prolog element Misc*
10681 *
10682 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10683 *
10684 * Returns 0, -1 in case of error. the parser context is augmented
10685 * as a result of the parsing.
10686 */
10687
10688 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10689 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10690 xmlChar start[4];
10691 xmlCharEncoding enc;
10692
10693 xmlInitParser();
10694
10695 if ((ctxt == NULL) || (ctxt->input == NULL))
10696 return(-1);
10697
10698 GROW;
10699
10700 /*
10701 * SAX: detecting the level.
10702 */
10703 xmlDetectSAX2(ctxt);
10704
10705 /*
10706 * SAX: beginning of the document processing.
10707 */
10708 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10709 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10710 if (ctxt->instate == XML_PARSER_EOF)
10711 return(-1);
10712
10713 if ((ctxt->encoding == NULL) &&
10714 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10715 /*
10716 * Get the 4 first bytes and decode the charset
10717 * if enc != XML_CHAR_ENCODING_NONE
10718 * plug some encoding conversion routines.
10719 */
10720 start[0] = RAW;
10721 start[1] = NXT(1);
10722 start[2] = NXT(2);
10723 start[3] = NXT(3);
10724 enc = xmlDetectCharEncoding(&start[0], 4);
10725 if (enc != XML_CHAR_ENCODING_NONE) {
10726 xmlSwitchEncoding(ctxt, enc);
10727 }
10728 }
10729
10730
10731 if (CUR == 0) {
10732 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10733 return(-1);
10734 }
10735
10736 /*
10737 * Check for the XMLDecl in the Prolog.
10738 * do not GROW here to avoid the detected encoder to decode more
10739 * than just the first line, unless the amount of data is really
10740 * too small to hold "<?xml version="1.0" encoding="foo"
10741 */
10742 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10743 GROW;
10744 }
10745 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10746
10747 /*
10748 * Note that we will switch encoding on the fly.
10749 */
10750 xmlParseXMLDecl(ctxt);
10751 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10752 (ctxt->instate == XML_PARSER_EOF)) {
10753 /*
10754 * The XML REC instructs us to stop parsing right here
10755 */
10756 return(-1);
10757 }
10758 ctxt->standalone = ctxt->input->standalone;
10759 SKIP_BLANKS;
10760 } else {
10761 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10762 }
10763 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10764 ctxt->sax->startDocument(ctxt->userData);
10765 if (ctxt->instate == XML_PARSER_EOF)
10766 return(-1);
10767 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10768 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10769 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10770 }
10771
10772 /*
10773 * The Misc part of the Prolog
10774 */
10775 GROW;
10776 xmlParseMisc(ctxt);
10777
10778 /*
10779 * Then possibly doc type declaration(s) and more Misc
10780 * (doctypedecl Misc*)?
10781 */
10782 GROW;
10783 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10784
10785 ctxt->inSubset = 1;
10786 xmlParseDocTypeDecl(ctxt);
10787 if (RAW == '[') {
10788 ctxt->instate = XML_PARSER_DTD;
10789 xmlParseInternalSubset(ctxt);
10790 if (ctxt->instate == XML_PARSER_EOF)
10791 return(-1);
10792 }
10793
10794 /*
10795 * Create and update the external subset.
10796 */
10797 ctxt->inSubset = 2;
10798 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10799 (!ctxt->disableSAX))
10800 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10801 ctxt->extSubSystem, ctxt->extSubURI);
10802 if (ctxt->instate == XML_PARSER_EOF)
10803 return(-1);
10804 ctxt->inSubset = 0;
10805
10806 xmlCleanSpecialAttr(ctxt);
10807
10808 ctxt->instate = XML_PARSER_PROLOG;
10809 xmlParseMisc(ctxt);
10810 }
10811
10812 /*
10813 * Time to start parsing the tree itself
10814 */
10815 GROW;
10816 if (RAW != '<') {
10817 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10818 "Start tag expected, '<' not found\n");
10819 } else {
10820 ctxt->instate = XML_PARSER_CONTENT;
10821 xmlParseElement(ctxt);
10822 ctxt->instate = XML_PARSER_EPILOG;
10823
10824
10825 /*
10826 * The Misc part at the end
10827 */
10828 xmlParseMisc(ctxt);
10829
10830 if (RAW != 0) {
10831 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10832 }
10833 ctxt->instate = XML_PARSER_EOF;
10834 }
10835
10836 /*
10837 * SAX: end of the document processing.
10838 */
10839 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10840 ctxt->sax->endDocument(ctxt->userData);
10841
10842 /*
10843 * Remove locally kept entity definitions if the tree was not built
10844 */
10845 if ((ctxt->myDoc != NULL) &&
10846 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10847 xmlFreeDoc(ctxt->myDoc);
10848 ctxt->myDoc = NULL;
10849 }
10850
10851 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10852 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10853 if (ctxt->valid)
10854 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10855 if (ctxt->nsWellFormed)
10856 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10857 if (ctxt->options & XML_PARSE_OLD10)
10858 ctxt->myDoc->properties |= XML_DOC_OLD10;
10859 }
10860 if (! ctxt->wellFormed) {
10861 ctxt->valid = 0;
10862 return(-1);
10863 }
10864 return(0);
10865 }
10866
10867 /**
10868 * xmlParseExtParsedEnt:
10869 * @ctxt: an XML parser context
10870 *
10871 * parse a general parsed entity
10872 * An external general parsed entity is well-formed if it matches the
10873 * production labeled extParsedEnt.
10874 *
10875 * [78] extParsedEnt ::= TextDecl? content
10876 *
10877 * Returns 0, -1 in case of error. the parser context is augmented
10878 * as a result of the parsing.
10879 */
10880
10881 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10882 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10883 xmlChar start[4];
10884 xmlCharEncoding enc;
10885
10886 if ((ctxt == NULL) || (ctxt->input == NULL))
10887 return(-1);
10888
10889 xmlDefaultSAXHandlerInit();
10890
10891 xmlDetectSAX2(ctxt);
10892
10893 GROW;
10894
10895 /*
10896 * SAX: beginning of the document processing.
10897 */
10898 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10899 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10900
10901 /*
10902 * Get the 4 first bytes and decode the charset
10903 * if enc != XML_CHAR_ENCODING_NONE
10904 * plug some encoding conversion routines.
10905 */
10906 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10907 start[0] = RAW;
10908 start[1] = NXT(1);
10909 start[2] = NXT(2);
10910 start[3] = NXT(3);
10911 enc = xmlDetectCharEncoding(start, 4);
10912 if (enc != XML_CHAR_ENCODING_NONE) {
10913 xmlSwitchEncoding(ctxt, enc);
10914 }
10915 }
10916
10917
10918 if (CUR == 0) {
10919 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10920 }
10921
10922 /*
10923 * Check for the XMLDecl in the Prolog.
10924 */
10925 GROW;
10926 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10927
10928 /*
10929 * Note that we will switch encoding on the fly.
10930 */
10931 xmlParseXMLDecl(ctxt);
10932 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10933 /*
10934 * The XML REC instructs us to stop parsing right here
10935 */
10936 return(-1);
10937 }
10938 SKIP_BLANKS;
10939 } else {
10940 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10941 }
10942 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10943 ctxt->sax->startDocument(ctxt->userData);
10944 if (ctxt->instate == XML_PARSER_EOF)
10945 return(-1);
10946
10947 /*
10948 * Doing validity checking on chunk doesn't make sense
10949 */
10950 ctxt->instate = XML_PARSER_CONTENT;
10951 ctxt->validate = 0;
10952 ctxt->loadsubset = 0;
10953 ctxt->depth = 0;
10954
10955 xmlParseContent(ctxt);
10956 if (ctxt->instate == XML_PARSER_EOF)
10957 return(-1);
10958
10959 if ((RAW == '<') && (NXT(1) == '/')) {
10960 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10961 } else if (RAW != 0) {
10962 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10963 }
10964
10965 /*
10966 * SAX: end of the document processing.
10967 */
10968 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10969 ctxt->sax->endDocument(ctxt->userData);
10970
10971 if (! ctxt->wellFormed) return(-1);
10972 return(0);
10973 }
10974
10975 #ifdef LIBXML_PUSH_ENABLED
10976 /************************************************************************
10977 * *
10978 * Progressive parsing interfaces *
10979 * *
10980 ************************************************************************/
10981
10982 /**
10983 * xmlParseLookupSequence:
10984 * @ctxt: an XML parser context
10985 * @first: the first char to lookup
10986 * @next: the next char to lookup or zero
10987 * @third: the next char to lookup or zero
10988 *
10989 * Try to find if a sequence (first, next, third) or just (first next) or
10990 * (first) is available in the input stream.
10991 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10992 * to avoid rescanning sequences of bytes, it DOES change the state of the
10993 * parser, do not use liberally.
10994 *
10995 * Returns the index to the current parsing point if the full sequence
10996 * is available, -1 otherwise.
10997 */
10998 static int
xmlParseLookupSequence(xmlParserCtxtPtr ctxt,xmlChar first,xmlChar next,xmlChar third)10999 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11000 xmlChar next, xmlChar third) {
11001 int base, len;
11002 xmlParserInputPtr in;
11003 const xmlChar *buf;
11004
11005 in = ctxt->input;
11006 if (in == NULL) return(-1);
11007 base = in->cur - in->base;
11008 if (base < 0) return(-1);
11009 if (ctxt->checkIndex > base)
11010 base = ctxt->checkIndex;
11011 if (in->buf == NULL) {
11012 buf = in->base;
11013 len = in->length;
11014 } else {
11015 buf = xmlBufContent(in->buf->buffer);
11016 len = xmlBufUse(in->buf->buffer);
11017 }
11018 /* take into account the sequence length */
11019 if (third) len -= 2;
11020 else if (next) len --;
11021 for (;base < len;base++) {
11022 if (buf[base] == first) {
11023 if (third != 0) {
11024 if ((buf[base + 1] != next) ||
11025 (buf[base + 2] != third)) continue;
11026 } else if (next != 0) {
11027 if (buf[base + 1] != next) continue;
11028 }
11029 ctxt->checkIndex = 0;
11030 #ifdef DEBUG_PUSH
11031 if (next == 0)
11032 xmlGenericError(xmlGenericErrorContext,
11033 "PP: lookup '%c' found at %d\n",
11034 first, base);
11035 else if (third == 0)
11036 xmlGenericError(xmlGenericErrorContext,
11037 "PP: lookup '%c%c' found at %d\n",
11038 first, next, base);
11039 else
11040 xmlGenericError(xmlGenericErrorContext,
11041 "PP: lookup '%c%c%c' found at %d\n",
11042 first, next, third, base);
11043 #endif
11044 return(base - (in->cur - in->base));
11045 }
11046 }
11047 ctxt->checkIndex = base;
11048 #ifdef DEBUG_PUSH
11049 if (next == 0)
11050 xmlGenericError(xmlGenericErrorContext,
11051 "PP: lookup '%c' failed\n", first);
11052 else if (third == 0)
11053 xmlGenericError(xmlGenericErrorContext,
11054 "PP: lookup '%c%c' failed\n", first, next);
11055 else
11056 xmlGenericError(xmlGenericErrorContext,
11057 "PP: lookup '%c%c%c' failed\n", first, next, third);
11058 #endif
11059 return(-1);
11060 }
11061
11062 /**
11063 * xmlParseGetLasts:
11064 * @ctxt: an XML parser context
11065 * @lastlt: pointer to store the last '<' from the input
11066 * @lastgt: pointer to store the last '>' from the input
11067 *
11068 * Lookup the last < and > in the current chunk
11069 */
11070 static void
xmlParseGetLasts(xmlParserCtxtPtr ctxt,const xmlChar ** lastlt,const xmlChar ** lastgt)11071 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11072 const xmlChar **lastgt) {
11073 const xmlChar *tmp;
11074
11075 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11076 xmlGenericError(xmlGenericErrorContext,
11077 "Internal error: xmlParseGetLasts\n");
11078 return;
11079 }
11080 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11081 tmp = ctxt->input->end;
11082 tmp--;
11083 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11084 if (tmp < ctxt->input->base) {
11085 *lastlt = NULL;
11086 *lastgt = NULL;
11087 } else {
11088 *lastlt = tmp;
11089 tmp++;
11090 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11091 if (*tmp == '\'') {
11092 tmp++;
11093 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11094 if (tmp < ctxt->input->end) tmp++;
11095 } else if (*tmp == '"') {
11096 tmp++;
11097 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11098 if (tmp < ctxt->input->end) tmp++;
11099 } else
11100 tmp++;
11101 }
11102 if (tmp < ctxt->input->end)
11103 *lastgt = tmp;
11104 else {
11105 tmp = *lastlt;
11106 tmp--;
11107 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11108 if (tmp >= ctxt->input->base)
11109 *lastgt = tmp;
11110 else
11111 *lastgt = NULL;
11112 }
11113 }
11114 } else {
11115 *lastlt = NULL;
11116 *lastgt = NULL;
11117 }
11118 }
11119 /**
11120 * xmlCheckCdataPush:
11121 * @cur: pointer to the block of characters
11122 * @len: length of the block in bytes
11123 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11124 *
11125 * Check that the block of characters is okay as SCdata content [20]
11126 *
11127 * Returns the number of bytes to pass if okay, a negative index where an
11128 * UTF-8 error occurred otherwise
11129 */
11130 static int
xmlCheckCdataPush(const xmlChar * utf,int len,int complete)11131 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11132 int ix;
11133 unsigned char c;
11134 int codepoint;
11135
11136 if ((utf == NULL) || (len <= 0))
11137 return(0);
11138
11139 for (ix = 0; ix < len;) { /* string is 0-terminated */
11140 c = utf[ix];
11141 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11142 if (c >= 0x20)
11143 ix++;
11144 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11145 ix++;
11146 else
11147 return(-ix);
11148 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11149 if (ix + 2 > len) return(complete ? -ix : ix);
11150 if ((utf[ix+1] & 0xc0 ) != 0x80)
11151 return(-ix);
11152 codepoint = (utf[ix] & 0x1f) << 6;
11153 codepoint |= utf[ix+1] & 0x3f;
11154 if (!xmlIsCharQ(codepoint))
11155 return(-ix);
11156 ix += 2;
11157 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11158 if (ix + 3 > len) return(complete ? -ix : ix);
11159 if (((utf[ix+1] & 0xc0) != 0x80) ||
11160 ((utf[ix+2] & 0xc0) != 0x80))
11161 return(-ix);
11162 codepoint = (utf[ix] & 0xf) << 12;
11163 codepoint |= (utf[ix+1] & 0x3f) << 6;
11164 codepoint |= utf[ix+2] & 0x3f;
11165 if (!xmlIsCharQ(codepoint))
11166 return(-ix);
11167 ix += 3;
11168 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11169 if (ix + 4 > len) return(complete ? -ix : ix);
11170 if (((utf[ix+1] & 0xc0) != 0x80) ||
11171 ((utf[ix+2] & 0xc0) != 0x80) ||
11172 ((utf[ix+3] & 0xc0) != 0x80))
11173 return(-ix);
11174 codepoint = (utf[ix] & 0x7) << 18;
11175 codepoint |= (utf[ix+1] & 0x3f) << 12;
11176 codepoint |= (utf[ix+2] & 0x3f) << 6;
11177 codepoint |= utf[ix+3] & 0x3f;
11178 if (!xmlIsCharQ(codepoint))
11179 return(-ix);
11180 ix += 4;
11181 } else /* unknown encoding */
11182 return(-ix);
11183 }
11184 return(ix);
11185 }
11186
11187 /**
11188 * xmlParseTryOrFinish:
11189 * @ctxt: an XML parser context
11190 * @terminate: last chunk indicator
11191 *
11192 * Try to progress on parsing
11193 *
11194 * Returns zero if no parsing was possible
11195 */
11196 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)11197 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11198 int ret = 0;
11199 int avail, tlen;
11200 xmlChar cur, next;
11201 const xmlChar *lastlt, *lastgt;
11202
11203 if (ctxt->input == NULL)
11204 return(0);
11205
11206 #ifdef DEBUG_PUSH
11207 switch (ctxt->instate) {
11208 case XML_PARSER_EOF:
11209 xmlGenericError(xmlGenericErrorContext,
11210 "PP: try EOF\n"); break;
11211 case XML_PARSER_START:
11212 xmlGenericError(xmlGenericErrorContext,
11213 "PP: try START\n"); break;
11214 case XML_PARSER_MISC:
11215 xmlGenericError(xmlGenericErrorContext,
11216 "PP: try MISC\n");break;
11217 case XML_PARSER_COMMENT:
11218 xmlGenericError(xmlGenericErrorContext,
11219 "PP: try COMMENT\n");break;
11220 case XML_PARSER_PROLOG:
11221 xmlGenericError(xmlGenericErrorContext,
11222 "PP: try PROLOG\n");break;
11223 case XML_PARSER_START_TAG:
11224 xmlGenericError(xmlGenericErrorContext,
11225 "PP: try START_TAG\n");break;
11226 case XML_PARSER_CONTENT:
11227 xmlGenericError(xmlGenericErrorContext,
11228 "PP: try CONTENT\n");break;
11229 case XML_PARSER_CDATA_SECTION:
11230 xmlGenericError(xmlGenericErrorContext,
11231 "PP: try CDATA_SECTION\n");break;
11232 case XML_PARSER_END_TAG:
11233 xmlGenericError(xmlGenericErrorContext,
11234 "PP: try END_TAG\n");break;
11235 case XML_PARSER_ENTITY_DECL:
11236 xmlGenericError(xmlGenericErrorContext,
11237 "PP: try ENTITY_DECL\n");break;
11238 case XML_PARSER_ENTITY_VALUE:
11239 xmlGenericError(xmlGenericErrorContext,
11240 "PP: try ENTITY_VALUE\n");break;
11241 case XML_PARSER_ATTRIBUTE_VALUE:
11242 xmlGenericError(xmlGenericErrorContext,
11243 "PP: try ATTRIBUTE_VALUE\n");break;
11244 case XML_PARSER_DTD:
11245 xmlGenericError(xmlGenericErrorContext,
11246 "PP: try DTD\n");break;
11247 case XML_PARSER_EPILOG:
11248 xmlGenericError(xmlGenericErrorContext,
11249 "PP: try EPILOG\n");break;
11250 case XML_PARSER_PI:
11251 xmlGenericError(xmlGenericErrorContext,
11252 "PP: try PI\n");break;
11253 case XML_PARSER_IGNORE:
11254 xmlGenericError(xmlGenericErrorContext,
11255 "PP: try IGNORE\n");break;
11256 }
11257 #endif
11258
11259 if ((ctxt->input != NULL) &&
11260 (ctxt->input->cur - ctxt->input->base > 4096)) {
11261 xmlSHRINK(ctxt);
11262 ctxt->checkIndex = 0;
11263 }
11264 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11265
11266 while (ctxt->instate != XML_PARSER_EOF) {
11267 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11268 return(0);
11269
11270 if (ctxt->input == NULL) break;
11271 if (ctxt->input->buf == NULL)
11272 avail = ctxt->input->length -
11273 (ctxt->input->cur - ctxt->input->base);
11274 else {
11275 /*
11276 * If we are operating on converted input, try to flush
11277 * remaining chars to avoid them stalling in the non-converted
11278 * buffer. But do not do this in document start where
11279 * encoding="..." may not have been read and we work on a
11280 * guessed encoding.
11281 */
11282 if ((ctxt->instate != XML_PARSER_START) &&
11283 (ctxt->input->buf->raw != NULL) &&
11284 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11285 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11286 ctxt->input);
11287 size_t current = ctxt->input->cur - ctxt->input->base;
11288
11289 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11290 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11291 base, current);
11292 }
11293 avail = xmlBufUse(ctxt->input->buf->buffer) -
11294 (ctxt->input->cur - ctxt->input->base);
11295 }
11296 if (avail < 1)
11297 goto done;
11298 switch (ctxt->instate) {
11299 case XML_PARSER_EOF:
11300 /*
11301 * Document parsing is done !
11302 */
11303 goto done;
11304 case XML_PARSER_START:
11305 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11306 xmlChar start[4];
11307 xmlCharEncoding enc;
11308
11309 /*
11310 * Very first chars read from the document flow.
11311 */
11312 if (avail < 4)
11313 goto done;
11314
11315 /*
11316 * Get the 4 first bytes and decode the charset
11317 * if enc != XML_CHAR_ENCODING_NONE
11318 * plug some encoding conversion routines,
11319 * else xmlSwitchEncoding will set to (default)
11320 * UTF8.
11321 */
11322 start[0] = RAW;
11323 start[1] = NXT(1);
11324 start[2] = NXT(2);
11325 start[3] = NXT(3);
11326 enc = xmlDetectCharEncoding(start, 4);
11327 xmlSwitchEncoding(ctxt, enc);
11328 break;
11329 }
11330
11331 if (avail < 2)
11332 goto done;
11333 cur = ctxt->input->cur[0];
11334 next = ctxt->input->cur[1];
11335 if (cur == 0) {
11336 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11337 ctxt->sax->setDocumentLocator(ctxt->userData,
11338 &xmlDefaultSAXLocator);
11339 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11340 xmlHaltParser(ctxt);
11341 #ifdef DEBUG_PUSH
11342 xmlGenericError(xmlGenericErrorContext,
11343 "PP: entering EOF\n");
11344 #endif
11345 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11346 ctxt->sax->endDocument(ctxt->userData);
11347 goto done;
11348 }
11349 if ((cur == '<') && (next == '?')) {
11350 /* PI or XML decl */
11351 if (avail < 5) return(ret);
11352 if ((!terminate) &&
11353 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11354 return(ret);
11355 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11356 ctxt->sax->setDocumentLocator(ctxt->userData,
11357 &xmlDefaultSAXLocator);
11358 if ((ctxt->input->cur[2] == 'x') &&
11359 (ctxt->input->cur[3] == 'm') &&
11360 (ctxt->input->cur[4] == 'l') &&
11361 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11362 ret += 5;
11363 #ifdef DEBUG_PUSH
11364 xmlGenericError(xmlGenericErrorContext,
11365 "PP: Parsing XML Decl\n");
11366 #endif
11367 xmlParseXMLDecl(ctxt);
11368 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11369 /*
11370 * The XML REC instructs us to stop parsing right
11371 * here
11372 */
11373 xmlHaltParser(ctxt);
11374 return(0);
11375 }
11376 ctxt->standalone = ctxt->input->standalone;
11377 if ((ctxt->encoding == NULL) &&
11378 (ctxt->input->encoding != NULL))
11379 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11380 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11381 (!ctxt->disableSAX))
11382 ctxt->sax->startDocument(ctxt->userData);
11383 ctxt->instate = XML_PARSER_MISC;
11384 #ifdef DEBUG_PUSH
11385 xmlGenericError(xmlGenericErrorContext,
11386 "PP: entering MISC\n");
11387 #endif
11388 } else {
11389 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11390 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11391 (!ctxt->disableSAX))
11392 ctxt->sax->startDocument(ctxt->userData);
11393 ctxt->instate = XML_PARSER_MISC;
11394 #ifdef DEBUG_PUSH
11395 xmlGenericError(xmlGenericErrorContext,
11396 "PP: entering MISC\n");
11397 #endif
11398 }
11399 } else {
11400 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11401 ctxt->sax->setDocumentLocator(ctxt->userData,
11402 &xmlDefaultSAXLocator);
11403 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11404 if (ctxt->version == NULL) {
11405 xmlErrMemory(ctxt, NULL);
11406 break;
11407 }
11408 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11409 (!ctxt->disableSAX))
11410 ctxt->sax->startDocument(ctxt->userData);
11411 ctxt->instate = XML_PARSER_MISC;
11412 #ifdef DEBUG_PUSH
11413 xmlGenericError(xmlGenericErrorContext,
11414 "PP: entering MISC\n");
11415 #endif
11416 }
11417 break;
11418 case XML_PARSER_START_TAG: {
11419 const xmlChar *name;
11420 const xmlChar *prefix = NULL;
11421 const xmlChar *URI = NULL;
11422 int line = ctxt->input->line;
11423 int nsNr = ctxt->nsNr;
11424
11425 if ((avail < 2) && (ctxt->inputNr == 1))
11426 goto done;
11427 cur = ctxt->input->cur[0];
11428 if (cur != '<') {
11429 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11430 xmlHaltParser(ctxt);
11431 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11432 ctxt->sax->endDocument(ctxt->userData);
11433 goto done;
11434 }
11435 if (!terminate) {
11436 if (ctxt->progressive) {
11437 /* > can be found unescaped in attribute values */
11438 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11439 goto done;
11440 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11441 goto done;
11442 }
11443 }
11444 if (ctxt->spaceNr == 0)
11445 spacePush(ctxt, -1);
11446 else if (*ctxt->space == -2)
11447 spacePush(ctxt, -1);
11448 else
11449 spacePush(ctxt, *ctxt->space);
11450 #ifdef LIBXML_SAX1_ENABLED
11451 if (ctxt->sax2)
11452 #endif /* LIBXML_SAX1_ENABLED */
11453 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11454 #ifdef LIBXML_SAX1_ENABLED
11455 else
11456 name = xmlParseStartTag(ctxt);
11457 #endif /* LIBXML_SAX1_ENABLED */
11458 if (ctxt->instate == XML_PARSER_EOF)
11459 goto done;
11460 if (name == NULL) {
11461 spacePop(ctxt);
11462 xmlHaltParser(ctxt);
11463 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11464 ctxt->sax->endDocument(ctxt->userData);
11465 goto done;
11466 }
11467 #ifdef LIBXML_VALID_ENABLED
11468 /*
11469 * [ VC: Root Element Type ]
11470 * The Name in the document type declaration must match
11471 * the element type of the root element.
11472 */
11473 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11474 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11475 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11476 #endif /* LIBXML_VALID_ENABLED */
11477
11478 /*
11479 * Check for an Empty Element.
11480 */
11481 if ((RAW == '/') && (NXT(1) == '>')) {
11482 SKIP(2);
11483
11484 if (ctxt->sax2) {
11485 if ((ctxt->sax != NULL) &&
11486 (ctxt->sax->endElementNs != NULL) &&
11487 (!ctxt->disableSAX))
11488 ctxt->sax->endElementNs(ctxt->userData, name,
11489 prefix, URI);
11490 if (ctxt->nsNr - nsNr > 0)
11491 nsPop(ctxt, ctxt->nsNr - nsNr);
11492 #ifdef LIBXML_SAX1_ENABLED
11493 } else {
11494 if ((ctxt->sax != NULL) &&
11495 (ctxt->sax->endElement != NULL) &&
11496 (!ctxt->disableSAX))
11497 ctxt->sax->endElement(ctxt->userData, name);
11498 #endif /* LIBXML_SAX1_ENABLED */
11499 }
11500 if (ctxt->instate == XML_PARSER_EOF)
11501 goto done;
11502 spacePop(ctxt);
11503 if (ctxt->nameNr == 0) {
11504 ctxt->instate = XML_PARSER_EPILOG;
11505 } else {
11506 ctxt->instate = XML_PARSER_CONTENT;
11507 }
11508 ctxt->progressive = 1;
11509 break;
11510 }
11511 if (RAW == '>') {
11512 NEXT;
11513 } else {
11514 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11515 "Couldn't find end of Start Tag %s\n",
11516 name);
11517 nodePop(ctxt);
11518 spacePop(ctxt);
11519 }
11520 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11521
11522 ctxt->instate = XML_PARSER_CONTENT;
11523 ctxt->progressive = 1;
11524 break;
11525 }
11526 case XML_PARSER_CONTENT: {
11527 const xmlChar *test;
11528 unsigned int cons;
11529 if ((avail < 2) && (ctxt->inputNr == 1))
11530 goto done;
11531 cur = ctxt->input->cur[0];
11532 next = ctxt->input->cur[1];
11533
11534 test = CUR_PTR;
11535 cons = ctxt->input->consumed;
11536 if ((cur == '<') && (next == '/')) {
11537 ctxt->instate = XML_PARSER_END_TAG;
11538 break;
11539 } else if ((cur == '<') && (next == '?')) {
11540 if ((!terminate) &&
11541 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11542 ctxt->progressive = XML_PARSER_PI;
11543 goto done;
11544 }
11545 xmlParsePI(ctxt);
11546 ctxt->instate = XML_PARSER_CONTENT;
11547 ctxt->progressive = 1;
11548 } else if ((cur == '<') && (next != '!')) {
11549 ctxt->instate = XML_PARSER_START_TAG;
11550 break;
11551 } else if ((cur == '<') && (next == '!') &&
11552 (ctxt->input->cur[2] == '-') &&
11553 (ctxt->input->cur[3] == '-')) {
11554 int term;
11555
11556 if (avail < 4)
11557 goto done;
11558 ctxt->input->cur += 4;
11559 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11560 ctxt->input->cur -= 4;
11561 if ((!terminate) && (term < 0)) {
11562 ctxt->progressive = XML_PARSER_COMMENT;
11563 goto done;
11564 }
11565 xmlParseComment(ctxt);
11566 ctxt->instate = XML_PARSER_CONTENT;
11567 ctxt->progressive = 1;
11568 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11569 (ctxt->input->cur[2] == '[') &&
11570 (ctxt->input->cur[3] == 'C') &&
11571 (ctxt->input->cur[4] == 'D') &&
11572 (ctxt->input->cur[5] == 'A') &&
11573 (ctxt->input->cur[6] == 'T') &&
11574 (ctxt->input->cur[7] == 'A') &&
11575 (ctxt->input->cur[8] == '[')) {
11576 SKIP(9);
11577 ctxt->instate = XML_PARSER_CDATA_SECTION;
11578 break;
11579 } else if ((cur == '<') && (next == '!') &&
11580 (avail < 9)) {
11581 goto done;
11582 } else if (cur == '&') {
11583 if ((!terminate) &&
11584 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11585 goto done;
11586 xmlParseReference(ctxt);
11587 } else {
11588 /* TODO Avoid the extra copy, handle directly !!! */
11589 /*
11590 * Goal of the following test is:
11591 * - minimize calls to the SAX 'character' callback
11592 * when they are mergeable
11593 * - handle an problem for isBlank when we only parse
11594 * a sequence of blank chars and the next one is
11595 * not available to check against '<' presence.
11596 * - tries to homogenize the differences in SAX
11597 * callbacks between the push and pull versions
11598 * of the parser.
11599 */
11600 if ((ctxt->inputNr == 1) &&
11601 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11602 if (!terminate) {
11603 if (ctxt->progressive) {
11604 if ((lastlt == NULL) ||
11605 (ctxt->input->cur > lastlt))
11606 goto done;
11607 } else if (xmlParseLookupSequence(ctxt,
11608 '<', 0, 0) < 0) {
11609 goto done;
11610 }
11611 }
11612 }
11613 ctxt->checkIndex = 0;
11614 xmlParseCharData(ctxt, 0);
11615 }
11616 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11617 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11618 "detected an error in element content\n");
11619 xmlHaltParser(ctxt);
11620 break;
11621 }
11622 break;
11623 }
11624 case XML_PARSER_END_TAG:
11625 if (avail < 2)
11626 goto done;
11627 if (!terminate) {
11628 if (ctxt->progressive) {
11629 /* > can be found unescaped in attribute values */
11630 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11631 goto done;
11632 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11633 goto done;
11634 }
11635 }
11636 if (ctxt->sax2) {
11637 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11638 nameNsPop(ctxt);
11639 }
11640 #ifdef LIBXML_SAX1_ENABLED
11641 else
11642 xmlParseEndTag1(ctxt, 0);
11643 #endif /* LIBXML_SAX1_ENABLED */
11644 if (ctxt->instate == XML_PARSER_EOF) {
11645 /* Nothing */
11646 } else if (ctxt->nameNr == 0) {
11647 ctxt->instate = XML_PARSER_EPILOG;
11648 } else {
11649 ctxt->instate = XML_PARSER_CONTENT;
11650 }
11651 break;
11652 case XML_PARSER_CDATA_SECTION: {
11653 /*
11654 * The Push mode need to have the SAX callback for
11655 * cdataBlock merge back contiguous callbacks.
11656 */
11657 int base;
11658
11659 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11660 if (base < 0) {
11661 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11662 int tmp;
11663
11664 tmp = xmlCheckCdataPush(ctxt->input->cur,
11665 XML_PARSER_BIG_BUFFER_SIZE, 0);
11666 if (tmp < 0) {
11667 tmp = -tmp;
11668 ctxt->input->cur += tmp;
11669 goto encoding_error;
11670 }
11671 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11672 if (ctxt->sax->cdataBlock != NULL)
11673 ctxt->sax->cdataBlock(ctxt->userData,
11674 ctxt->input->cur, tmp);
11675 else if (ctxt->sax->characters != NULL)
11676 ctxt->sax->characters(ctxt->userData,
11677 ctxt->input->cur, tmp);
11678 }
11679 if (ctxt->instate == XML_PARSER_EOF)
11680 goto done;
11681 SKIPL(tmp);
11682 ctxt->checkIndex = 0;
11683 }
11684 goto done;
11685 } else {
11686 int tmp;
11687
11688 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11689 if ((tmp < 0) || (tmp != base)) {
11690 tmp = -tmp;
11691 ctxt->input->cur += tmp;
11692 goto encoding_error;
11693 }
11694 if ((ctxt->sax != NULL) && (base == 0) &&
11695 (ctxt->sax->cdataBlock != NULL) &&
11696 (!ctxt->disableSAX)) {
11697 /*
11698 * Special case to provide identical behaviour
11699 * between pull and push parsers on enpty CDATA
11700 * sections
11701 */
11702 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11703 (!strncmp((const char *)&ctxt->input->cur[-9],
11704 "<![CDATA[", 9)))
11705 ctxt->sax->cdataBlock(ctxt->userData,
11706 BAD_CAST "", 0);
11707 } else if ((ctxt->sax != NULL) && (base > 0) &&
11708 (!ctxt->disableSAX)) {
11709 if (ctxt->sax->cdataBlock != NULL)
11710 ctxt->sax->cdataBlock(ctxt->userData,
11711 ctxt->input->cur, base);
11712 else if (ctxt->sax->characters != NULL)
11713 ctxt->sax->characters(ctxt->userData,
11714 ctxt->input->cur, base);
11715 }
11716 if (ctxt->instate == XML_PARSER_EOF)
11717 goto done;
11718 SKIPL(base + 3);
11719 ctxt->checkIndex = 0;
11720 ctxt->instate = XML_PARSER_CONTENT;
11721 #ifdef DEBUG_PUSH
11722 xmlGenericError(xmlGenericErrorContext,
11723 "PP: entering CONTENT\n");
11724 #endif
11725 }
11726 break;
11727 }
11728 case XML_PARSER_MISC:
11729 SKIP_BLANKS;
11730 if (ctxt->input->buf == NULL)
11731 avail = ctxt->input->length -
11732 (ctxt->input->cur - ctxt->input->base);
11733 else
11734 avail = xmlBufUse(ctxt->input->buf->buffer) -
11735 (ctxt->input->cur - ctxt->input->base);
11736 if (avail < 2)
11737 goto done;
11738 cur = ctxt->input->cur[0];
11739 next = ctxt->input->cur[1];
11740 if ((cur == '<') && (next == '?')) {
11741 if ((!terminate) &&
11742 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11743 ctxt->progressive = XML_PARSER_PI;
11744 goto done;
11745 }
11746 #ifdef DEBUG_PUSH
11747 xmlGenericError(xmlGenericErrorContext,
11748 "PP: Parsing PI\n");
11749 #endif
11750 xmlParsePI(ctxt);
11751 if (ctxt->instate == XML_PARSER_EOF)
11752 goto done;
11753 ctxt->instate = XML_PARSER_MISC;
11754 ctxt->progressive = 1;
11755 ctxt->checkIndex = 0;
11756 } else if ((cur == '<') && (next == '!') &&
11757 (ctxt->input->cur[2] == '-') &&
11758 (ctxt->input->cur[3] == '-')) {
11759 if ((!terminate) &&
11760 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11761 ctxt->progressive = XML_PARSER_COMMENT;
11762 goto done;
11763 }
11764 #ifdef DEBUG_PUSH
11765 xmlGenericError(xmlGenericErrorContext,
11766 "PP: Parsing Comment\n");
11767 #endif
11768 xmlParseComment(ctxt);
11769 if (ctxt->instate == XML_PARSER_EOF)
11770 goto done;
11771 ctxt->instate = XML_PARSER_MISC;
11772 ctxt->progressive = 1;
11773 ctxt->checkIndex = 0;
11774 } else if ((cur == '<') && (next == '!') &&
11775 (ctxt->input->cur[2] == 'D') &&
11776 (ctxt->input->cur[3] == 'O') &&
11777 (ctxt->input->cur[4] == 'C') &&
11778 (ctxt->input->cur[5] == 'T') &&
11779 (ctxt->input->cur[6] == 'Y') &&
11780 (ctxt->input->cur[7] == 'P') &&
11781 (ctxt->input->cur[8] == 'E')) {
11782 if ((!terminate) &&
11783 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11784 ctxt->progressive = XML_PARSER_DTD;
11785 goto done;
11786 }
11787 #ifdef DEBUG_PUSH
11788 xmlGenericError(xmlGenericErrorContext,
11789 "PP: Parsing internal subset\n");
11790 #endif
11791 ctxt->inSubset = 1;
11792 ctxt->progressive = 0;
11793 ctxt->checkIndex = 0;
11794 xmlParseDocTypeDecl(ctxt);
11795 if (ctxt->instate == XML_PARSER_EOF)
11796 goto done;
11797 if (RAW == '[') {
11798 ctxt->instate = XML_PARSER_DTD;
11799 #ifdef DEBUG_PUSH
11800 xmlGenericError(xmlGenericErrorContext,
11801 "PP: entering DTD\n");
11802 #endif
11803 } else {
11804 /*
11805 * Create and update the external subset.
11806 */
11807 ctxt->inSubset = 2;
11808 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11809 (ctxt->sax->externalSubset != NULL))
11810 ctxt->sax->externalSubset(ctxt->userData,
11811 ctxt->intSubName, ctxt->extSubSystem,
11812 ctxt->extSubURI);
11813 ctxt->inSubset = 0;
11814 xmlCleanSpecialAttr(ctxt);
11815 ctxt->instate = XML_PARSER_PROLOG;
11816 #ifdef DEBUG_PUSH
11817 xmlGenericError(xmlGenericErrorContext,
11818 "PP: entering PROLOG\n");
11819 #endif
11820 }
11821 } else if ((cur == '<') && (next == '!') &&
11822 (avail < 9)) {
11823 goto done;
11824 } else {
11825 ctxt->instate = XML_PARSER_START_TAG;
11826 ctxt->progressive = XML_PARSER_START_TAG;
11827 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11828 #ifdef DEBUG_PUSH
11829 xmlGenericError(xmlGenericErrorContext,
11830 "PP: entering START_TAG\n");
11831 #endif
11832 }
11833 break;
11834 case XML_PARSER_PROLOG:
11835 SKIP_BLANKS;
11836 if (ctxt->input->buf == NULL)
11837 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11838 else
11839 avail = xmlBufUse(ctxt->input->buf->buffer) -
11840 (ctxt->input->cur - ctxt->input->base);
11841 if (avail < 2)
11842 goto done;
11843 cur = ctxt->input->cur[0];
11844 next = ctxt->input->cur[1];
11845 if ((cur == '<') && (next == '?')) {
11846 if ((!terminate) &&
11847 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11848 ctxt->progressive = XML_PARSER_PI;
11849 goto done;
11850 }
11851 #ifdef DEBUG_PUSH
11852 xmlGenericError(xmlGenericErrorContext,
11853 "PP: Parsing PI\n");
11854 #endif
11855 xmlParsePI(ctxt);
11856 if (ctxt->instate == XML_PARSER_EOF)
11857 goto done;
11858 ctxt->instate = XML_PARSER_PROLOG;
11859 ctxt->progressive = 1;
11860 } else if ((cur == '<') && (next == '!') &&
11861 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11862 if ((!terminate) &&
11863 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11864 ctxt->progressive = XML_PARSER_COMMENT;
11865 goto done;
11866 }
11867 #ifdef DEBUG_PUSH
11868 xmlGenericError(xmlGenericErrorContext,
11869 "PP: Parsing Comment\n");
11870 #endif
11871 xmlParseComment(ctxt);
11872 if (ctxt->instate == XML_PARSER_EOF)
11873 goto done;
11874 ctxt->instate = XML_PARSER_PROLOG;
11875 ctxt->progressive = 1;
11876 } else if ((cur == '<') && (next == '!') &&
11877 (avail < 4)) {
11878 goto done;
11879 } else {
11880 ctxt->instate = XML_PARSER_START_TAG;
11881 if (ctxt->progressive == 0)
11882 ctxt->progressive = XML_PARSER_START_TAG;
11883 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11884 #ifdef DEBUG_PUSH
11885 xmlGenericError(xmlGenericErrorContext,
11886 "PP: entering START_TAG\n");
11887 #endif
11888 }
11889 break;
11890 case XML_PARSER_EPILOG:
11891 SKIP_BLANKS;
11892 if (ctxt->input->buf == NULL)
11893 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11894 else
11895 avail = xmlBufUse(ctxt->input->buf->buffer) -
11896 (ctxt->input->cur - ctxt->input->base);
11897 if (avail < 2)
11898 goto done;
11899 cur = ctxt->input->cur[0];
11900 next = ctxt->input->cur[1];
11901 if ((cur == '<') && (next == '?')) {
11902 if ((!terminate) &&
11903 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11904 ctxt->progressive = XML_PARSER_PI;
11905 goto done;
11906 }
11907 #ifdef DEBUG_PUSH
11908 xmlGenericError(xmlGenericErrorContext,
11909 "PP: Parsing PI\n");
11910 #endif
11911 xmlParsePI(ctxt);
11912 if (ctxt->instate == XML_PARSER_EOF)
11913 goto done;
11914 ctxt->instate = XML_PARSER_EPILOG;
11915 ctxt->progressive = 1;
11916 } else if ((cur == '<') && (next == '!') &&
11917 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11918 if ((!terminate) &&
11919 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11920 ctxt->progressive = XML_PARSER_COMMENT;
11921 goto done;
11922 }
11923 #ifdef DEBUG_PUSH
11924 xmlGenericError(xmlGenericErrorContext,
11925 "PP: Parsing Comment\n");
11926 #endif
11927 xmlParseComment(ctxt);
11928 if (ctxt->instate == XML_PARSER_EOF)
11929 goto done;
11930 ctxt->instate = XML_PARSER_EPILOG;
11931 ctxt->progressive = 1;
11932 } else if ((cur == '<') && (next == '!') &&
11933 (avail < 4)) {
11934 goto done;
11935 } else {
11936 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11937 xmlHaltParser(ctxt);
11938 #ifdef DEBUG_PUSH
11939 xmlGenericError(xmlGenericErrorContext,
11940 "PP: entering EOF\n");
11941 #endif
11942 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11943 ctxt->sax->endDocument(ctxt->userData);
11944 goto done;
11945 }
11946 break;
11947 case XML_PARSER_DTD: {
11948 /*
11949 * Sorry but progressive parsing of the internal subset
11950 * is not expected to be supported. We first check that
11951 * the full content of the internal subset is available and
11952 * the parsing is launched only at that point.
11953 * Internal subset ends up with "']' S? '>'" in an unescaped
11954 * section and not in a ']]>' sequence which are conditional
11955 * sections (whoever argued to keep that crap in XML deserve
11956 * a place in hell !).
11957 */
11958 int base, i;
11959 xmlChar *buf;
11960 xmlChar quote = 0;
11961 size_t use;
11962
11963 base = ctxt->input->cur - ctxt->input->base;
11964 if (base < 0) return(0);
11965 if (ctxt->checkIndex > base)
11966 base = ctxt->checkIndex;
11967 buf = xmlBufContent(ctxt->input->buf->buffer);
11968 use = xmlBufUse(ctxt->input->buf->buffer);
11969 for (;(unsigned int) base < use; base++) {
11970 if (quote != 0) {
11971 if (buf[base] == quote)
11972 quote = 0;
11973 continue;
11974 }
11975 if ((quote == 0) && (buf[base] == '<')) {
11976 int found = 0;
11977 /* special handling of comments */
11978 if (((unsigned int) base + 4 < use) &&
11979 (buf[base + 1] == '!') &&
11980 (buf[base + 2] == '-') &&
11981 (buf[base + 3] == '-')) {
11982 for (;(unsigned int) base + 3 < use; base++) {
11983 if ((buf[base] == '-') &&
11984 (buf[base + 1] == '-') &&
11985 (buf[base + 2] == '>')) {
11986 found = 1;
11987 base += 2;
11988 break;
11989 }
11990 }
11991 if (!found) {
11992 #if 0
11993 fprintf(stderr, "unfinished comment\n");
11994 #endif
11995 break; /* for */
11996 }
11997 continue;
11998 }
11999 }
12000 if (buf[base] == '"') {
12001 quote = '"';
12002 continue;
12003 }
12004 if (buf[base] == '\'') {
12005 quote = '\'';
12006 continue;
12007 }
12008 if (buf[base] == ']') {
12009 #if 0
12010 fprintf(stderr, "%c%c%c%c: ", buf[base],
12011 buf[base + 1], buf[base + 2], buf[base + 3]);
12012 #endif
12013 if ((unsigned int) base +1 >= use)
12014 break;
12015 if (buf[base + 1] == ']') {
12016 /* conditional crap, skip both ']' ! */
12017 base++;
12018 continue;
12019 }
12020 for (i = 1; (unsigned int) base + i < use; i++) {
12021 if (buf[base + i] == '>') {
12022 #if 0
12023 fprintf(stderr, "found\n");
12024 #endif
12025 goto found_end_int_subset;
12026 }
12027 if (!IS_BLANK_CH(buf[base + i])) {
12028 #if 0
12029 fprintf(stderr, "not found\n");
12030 #endif
12031 goto not_end_of_int_subset;
12032 }
12033 }
12034 #if 0
12035 fprintf(stderr, "end of stream\n");
12036 #endif
12037 break;
12038
12039 }
12040 not_end_of_int_subset:
12041 continue; /* for */
12042 }
12043 /*
12044 * We didn't found the end of the Internal subset
12045 */
12046 if (quote == 0)
12047 ctxt->checkIndex = base;
12048 else
12049 ctxt->checkIndex = 0;
12050 #ifdef DEBUG_PUSH
12051 if (next == 0)
12052 xmlGenericError(xmlGenericErrorContext,
12053 "PP: lookup of int subset end filed\n");
12054 #endif
12055 goto done;
12056
12057 found_end_int_subset:
12058 ctxt->checkIndex = 0;
12059 xmlParseInternalSubset(ctxt);
12060 if (ctxt->instate == XML_PARSER_EOF)
12061 goto done;
12062 ctxt->inSubset = 2;
12063 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12064 (ctxt->sax->externalSubset != NULL))
12065 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12066 ctxt->extSubSystem, ctxt->extSubURI);
12067 ctxt->inSubset = 0;
12068 xmlCleanSpecialAttr(ctxt);
12069 if (ctxt->instate == XML_PARSER_EOF)
12070 goto done;
12071 ctxt->instate = XML_PARSER_PROLOG;
12072 ctxt->checkIndex = 0;
12073 #ifdef DEBUG_PUSH
12074 xmlGenericError(xmlGenericErrorContext,
12075 "PP: entering PROLOG\n");
12076 #endif
12077 break;
12078 }
12079 case XML_PARSER_COMMENT:
12080 xmlGenericError(xmlGenericErrorContext,
12081 "PP: internal error, state == COMMENT\n");
12082 ctxt->instate = XML_PARSER_CONTENT;
12083 #ifdef DEBUG_PUSH
12084 xmlGenericError(xmlGenericErrorContext,
12085 "PP: entering CONTENT\n");
12086 #endif
12087 break;
12088 case XML_PARSER_IGNORE:
12089 xmlGenericError(xmlGenericErrorContext,
12090 "PP: internal error, state == IGNORE");
12091 ctxt->instate = XML_PARSER_DTD;
12092 #ifdef DEBUG_PUSH
12093 xmlGenericError(xmlGenericErrorContext,
12094 "PP: entering DTD\n");
12095 #endif
12096 break;
12097 case XML_PARSER_PI:
12098 xmlGenericError(xmlGenericErrorContext,
12099 "PP: internal error, state == PI\n");
12100 ctxt->instate = XML_PARSER_CONTENT;
12101 #ifdef DEBUG_PUSH
12102 xmlGenericError(xmlGenericErrorContext,
12103 "PP: entering CONTENT\n");
12104 #endif
12105 break;
12106 case XML_PARSER_ENTITY_DECL:
12107 xmlGenericError(xmlGenericErrorContext,
12108 "PP: internal error, state == ENTITY_DECL\n");
12109 ctxt->instate = XML_PARSER_DTD;
12110 #ifdef DEBUG_PUSH
12111 xmlGenericError(xmlGenericErrorContext,
12112 "PP: entering DTD\n");
12113 #endif
12114 break;
12115 case XML_PARSER_ENTITY_VALUE:
12116 xmlGenericError(xmlGenericErrorContext,
12117 "PP: internal error, state == ENTITY_VALUE\n");
12118 ctxt->instate = XML_PARSER_CONTENT;
12119 #ifdef DEBUG_PUSH
12120 xmlGenericError(xmlGenericErrorContext,
12121 "PP: entering DTD\n");
12122 #endif
12123 break;
12124 case XML_PARSER_ATTRIBUTE_VALUE:
12125 xmlGenericError(xmlGenericErrorContext,
12126 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12127 ctxt->instate = XML_PARSER_START_TAG;
12128 #ifdef DEBUG_PUSH
12129 xmlGenericError(xmlGenericErrorContext,
12130 "PP: entering START_TAG\n");
12131 #endif
12132 break;
12133 case XML_PARSER_SYSTEM_LITERAL:
12134 xmlGenericError(xmlGenericErrorContext,
12135 "PP: internal error, state == SYSTEM_LITERAL\n");
12136 ctxt->instate = XML_PARSER_START_TAG;
12137 #ifdef DEBUG_PUSH
12138 xmlGenericError(xmlGenericErrorContext,
12139 "PP: entering START_TAG\n");
12140 #endif
12141 break;
12142 case XML_PARSER_PUBLIC_LITERAL:
12143 xmlGenericError(xmlGenericErrorContext,
12144 "PP: internal error, state == PUBLIC_LITERAL\n");
12145 ctxt->instate = XML_PARSER_START_TAG;
12146 #ifdef DEBUG_PUSH
12147 xmlGenericError(xmlGenericErrorContext,
12148 "PP: entering START_TAG\n");
12149 #endif
12150 break;
12151 }
12152 }
12153 done:
12154 #ifdef DEBUG_PUSH
12155 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12156 #endif
12157 return(ret);
12158 encoding_error:
12159 {
12160 char buffer[150];
12161
12162 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12163 ctxt->input->cur[0], ctxt->input->cur[1],
12164 ctxt->input->cur[2], ctxt->input->cur[3]);
12165 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12166 "Input is not proper UTF-8, indicate encoding !\n%s",
12167 BAD_CAST buffer, NULL);
12168 }
12169 return(0);
12170 }
12171
12172 /**
12173 * xmlParseCheckTransition:
12174 * @ctxt: an XML parser context
12175 * @chunk: a char array
12176 * @size: the size in byte of the chunk
12177 *
12178 * Check depending on the current parser state if the chunk given must be
12179 * processed immediately or one need more data to advance on parsing.
12180 *
12181 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12182 */
12183 static int
xmlParseCheckTransition(xmlParserCtxtPtr ctxt,const char * chunk,int size)12184 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12185 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12186 return(-1);
12187 if (ctxt->instate == XML_PARSER_START_TAG) {
12188 if (memchr(chunk, '>', size) != NULL)
12189 return(1);
12190 return(0);
12191 }
12192 if (ctxt->progressive == XML_PARSER_COMMENT) {
12193 if (memchr(chunk, '>', size) != NULL)
12194 return(1);
12195 return(0);
12196 }
12197 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12198 if (memchr(chunk, '>', size) != NULL)
12199 return(1);
12200 return(0);
12201 }
12202 if (ctxt->progressive == XML_PARSER_PI) {
12203 if (memchr(chunk, '>', size) != NULL)
12204 return(1);
12205 return(0);
12206 }
12207 if (ctxt->instate == XML_PARSER_END_TAG) {
12208 if (memchr(chunk, '>', size) != NULL)
12209 return(1);
12210 return(0);
12211 }
12212 if ((ctxt->progressive == XML_PARSER_DTD) ||
12213 (ctxt->instate == XML_PARSER_DTD)) {
12214 if (memchr(chunk, '>', size) != NULL)
12215 return(1);
12216 return(0);
12217 }
12218 return(1);
12219 }
12220
12221 /**
12222 * xmlParseChunk:
12223 * @ctxt: an XML parser context
12224 * @chunk: an char array
12225 * @size: the size in byte of the chunk
12226 * @terminate: last chunk indicator
12227 *
12228 * Parse a Chunk of memory
12229 *
12230 * Returns zero if no error, the xmlParserErrors otherwise.
12231 */
12232 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)12233 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12234 int terminate) {
12235 int end_in_lf = 0;
12236 int remain = 0;
12237 size_t old_avail = 0;
12238 size_t avail = 0;
12239
12240 if (ctxt == NULL)
12241 return(XML_ERR_INTERNAL_ERROR);
12242 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12243 return(ctxt->errNo);
12244 if (ctxt->instate == XML_PARSER_EOF)
12245 return(-1);
12246 if (ctxt->instate == XML_PARSER_START)
12247 xmlDetectSAX2(ctxt);
12248 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12249 (chunk[size - 1] == '\r')) {
12250 end_in_lf = 1;
12251 size--;
12252 }
12253
12254 xmldecl_done:
12255
12256 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12257 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12258 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12259 size_t cur = ctxt->input->cur - ctxt->input->base;
12260 int res;
12261
12262 old_avail = xmlBufUse(ctxt->input->buf->buffer);
12263 /*
12264 * Specific handling if we autodetected an encoding, we should not
12265 * push more than the first line ... which depend on the encoding
12266 * And only push the rest once the final encoding was detected
12267 */
12268 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12269 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12270 unsigned int len = 45;
12271
12272 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12273 BAD_CAST "UTF-16")) ||
12274 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12275 BAD_CAST "UTF16")))
12276 len = 90;
12277 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12278 BAD_CAST "UCS-4")) ||
12279 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12280 BAD_CAST "UCS4")))
12281 len = 180;
12282
12283 if (ctxt->input->buf->rawconsumed < len)
12284 len -= ctxt->input->buf->rawconsumed;
12285
12286 /*
12287 * Change size for reading the initial declaration only
12288 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12289 * will blindly copy extra bytes from memory.
12290 */
12291 if ((unsigned int) size > len) {
12292 remain = size - len;
12293 size = len;
12294 } else {
12295 remain = 0;
12296 }
12297 }
12298 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12299 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12300 if (res < 0) {
12301 ctxt->errNo = XML_PARSER_EOF;
12302 xmlHaltParser(ctxt);
12303 return (XML_PARSER_EOF);
12304 }
12305 #ifdef DEBUG_PUSH
12306 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12307 #endif
12308
12309 } else if (ctxt->instate != XML_PARSER_EOF) {
12310 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12311 xmlParserInputBufferPtr in = ctxt->input->buf;
12312 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12313 (in->raw != NULL)) {
12314 int nbchars;
12315 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12316 size_t current = ctxt->input->cur - ctxt->input->base;
12317
12318 nbchars = xmlCharEncInput(in, terminate);
12319 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12320 if (nbchars < 0) {
12321 /* TODO 2.6.0 */
12322 xmlGenericError(xmlGenericErrorContext,
12323 "xmlParseChunk: encoder error\n");
12324 xmlHaltParser(ctxt);
12325 return(XML_ERR_INVALID_ENCODING);
12326 }
12327 }
12328 }
12329 }
12330 if (remain != 0) {
12331 xmlParseTryOrFinish(ctxt, 0);
12332 } else {
12333 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12334 avail = xmlBufUse(ctxt->input->buf->buffer);
12335 /*
12336 * Depending on the current state it may not be such
12337 * a good idea to try parsing if there is nothing in the chunk
12338 * which would be worth doing a parser state transition and we
12339 * need to wait for more data
12340 */
12341 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12342 (old_avail == 0) || (avail == 0) ||
12343 (xmlParseCheckTransition(ctxt,
12344 (const char *)&ctxt->input->base[old_avail],
12345 avail - old_avail)))
12346 xmlParseTryOrFinish(ctxt, terminate);
12347 }
12348 if (ctxt->instate == XML_PARSER_EOF)
12349 return(ctxt->errNo);
12350
12351 if ((ctxt->input != NULL) &&
12352 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12353 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12354 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12355 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12356 xmlHaltParser(ctxt);
12357 }
12358 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12359 return(ctxt->errNo);
12360
12361 if (remain != 0) {
12362 chunk += size;
12363 size = remain;
12364 remain = 0;
12365 goto xmldecl_done;
12366 }
12367 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12368 (ctxt->input->buf != NULL)) {
12369 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12370 ctxt->input);
12371 size_t current = ctxt->input->cur - ctxt->input->base;
12372
12373 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12374
12375 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12376 base, current);
12377 }
12378 if (terminate) {
12379 /*
12380 * Check for termination
12381 */
12382 int cur_avail = 0;
12383
12384 if (ctxt->input != NULL) {
12385 if (ctxt->input->buf == NULL)
12386 cur_avail = ctxt->input->length -
12387 (ctxt->input->cur - ctxt->input->base);
12388 else
12389 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12390 (ctxt->input->cur - ctxt->input->base);
12391 }
12392
12393 if ((ctxt->instate != XML_PARSER_EOF) &&
12394 (ctxt->instate != XML_PARSER_EPILOG)) {
12395 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12396 }
12397 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12398 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12399 }
12400 if (ctxt->instate != XML_PARSER_EOF) {
12401 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12402 ctxt->sax->endDocument(ctxt->userData);
12403 }
12404 ctxt->instate = XML_PARSER_EOF;
12405 }
12406 if (ctxt->wellFormed == 0)
12407 return((xmlParserErrors) ctxt->errNo);
12408 else
12409 return(0);
12410 }
12411
12412 /************************************************************************
12413 * *
12414 * I/O front end functions to the parser *
12415 * *
12416 ************************************************************************/
12417
12418 /**
12419 * xmlCreatePushParserCtxt:
12420 * @sax: a SAX handler
12421 * @user_data: The user data returned on SAX callbacks
12422 * @chunk: a pointer to an array of chars
12423 * @size: number of chars in the array
12424 * @filename: an optional file name or URI
12425 *
12426 * Create a parser context for using the XML parser in push mode.
12427 * If @buffer and @size are non-NULL, the data is used to detect
12428 * the encoding. The remaining characters will be parsed so they
12429 * don't need to be fed in again through xmlParseChunk.
12430 * To allow content encoding detection, @size should be >= 4
12431 * The value of @filename is used for fetching external entities
12432 * and error/warning reports.
12433 *
12434 * Returns the new parser context or NULL
12435 */
12436
12437 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)12438 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12439 const char *chunk, int size, const char *filename) {
12440 xmlParserCtxtPtr ctxt;
12441 xmlParserInputPtr inputStream;
12442 xmlParserInputBufferPtr buf;
12443 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12444
12445 /*
12446 * plug some encoding conversion routines
12447 */
12448 if ((chunk != NULL) && (size >= 4))
12449 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12450
12451 buf = xmlAllocParserInputBuffer(enc);
12452 if (buf == NULL) return(NULL);
12453
12454 ctxt = xmlNewParserCtxt();
12455 if (ctxt == NULL) {
12456 xmlErrMemory(NULL, "creating parser: out of memory\n");
12457 xmlFreeParserInputBuffer(buf);
12458 return(NULL);
12459 }
12460 ctxt->dictNames = 1;
12461 if (sax != NULL) {
12462 #ifdef LIBXML_SAX1_ENABLED
12463 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12464 #endif /* LIBXML_SAX1_ENABLED */
12465 xmlFree(ctxt->sax);
12466 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12467 if (ctxt->sax == NULL) {
12468 xmlErrMemory(ctxt, NULL);
12469 xmlFreeParserInputBuffer(buf);
12470 xmlFreeParserCtxt(ctxt);
12471 return(NULL);
12472 }
12473 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12474 if (sax->initialized == XML_SAX2_MAGIC)
12475 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12476 else
12477 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12478 if (user_data != NULL)
12479 ctxt->userData = user_data;
12480 }
12481 if (filename == NULL) {
12482 ctxt->directory = NULL;
12483 } else {
12484 ctxt->directory = xmlParserGetDirectory(filename);
12485 }
12486
12487 inputStream = xmlNewInputStream(ctxt);
12488 if (inputStream == NULL) {
12489 xmlFreeParserCtxt(ctxt);
12490 xmlFreeParserInputBuffer(buf);
12491 return(NULL);
12492 }
12493
12494 if (filename == NULL)
12495 inputStream->filename = NULL;
12496 else {
12497 inputStream->filename = (char *)
12498 xmlCanonicPath((const xmlChar *) filename);
12499 if (inputStream->filename == NULL) {
12500 xmlFreeParserCtxt(ctxt);
12501 xmlFreeParserInputBuffer(buf);
12502 return(NULL);
12503 }
12504 }
12505 inputStream->buf = buf;
12506 xmlBufResetInput(inputStream->buf->buffer, inputStream);
12507 inputPush(ctxt, inputStream);
12508
12509 /*
12510 * If the caller didn't provide an initial 'chunk' for determining
12511 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12512 * that it can be automatically determined later
12513 */
12514 if ((size == 0) || (chunk == NULL)) {
12515 ctxt->charset = XML_CHAR_ENCODING_NONE;
12516 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12517 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12518 size_t cur = ctxt->input->cur - ctxt->input->base;
12519
12520 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12521
12522 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12523 #ifdef DEBUG_PUSH
12524 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12525 #endif
12526 }
12527
12528 if (enc != XML_CHAR_ENCODING_NONE) {
12529 xmlSwitchEncoding(ctxt, enc);
12530 }
12531
12532 return(ctxt);
12533 }
12534 #endif /* LIBXML_PUSH_ENABLED */
12535
12536 /**
12537 * xmlHaltParser:
12538 * @ctxt: an XML parser context
12539 *
12540 * Blocks further parser processing don't override error
12541 * for internal use
12542 */
12543 static void
xmlHaltParser(xmlParserCtxtPtr ctxt)12544 xmlHaltParser(xmlParserCtxtPtr ctxt) {
12545 if (ctxt == NULL)
12546 return;
12547 ctxt->instate = XML_PARSER_EOF;
12548 ctxt->disableSAX = 1;
12549 while (ctxt->inputNr > 1)
12550 xmlFreeInputStream(inputPop(ctxt));
12551 if (ctxt->input != NULL) {
12552 /*
12553 * in case there was a specific allocation deallocate before
12554 * overriding base
12555 */
12556 if (ctxt->input->free != NULL) {
12557 ctxt->input->free((xmlChar *) ctxt->input->base);
12558 ctxt->input->free = NULL;
12559 }
12560 if (ctxt->input->buf != NULL) {
12561 xmlFreeParserInputBuffer(ctxt->input->buf);
12562 ctxt->input->buf = NULL;
12563 }
12564 ctxt->input->cur = BAD_CAST"";
12565 ctxt->input->length = 0;
12566 ctxt->input->base = ctxt->input->cur;
12567 ctxt->input->end = ctxt->input->cur;
12568 }
12569 }
12570
12571 /**
12572 * xmlStopParser:
12573 * @ctxt: an XML parser context
12574 *
12575 * Blocks further parser processing
12576 */
12577 void
xmlStopParser(xmlParserCtxtPtr ctxt)12578 xmlStopParser(xmlParserCtxtPtr ctxt) {
12579 if (ctxt == NULL)
12580 return;
12581 xmlHaltParser(ctxt);
12582 ctxt->errNo = XML_ERR_USER_STOP;
12583 }
12584
12585 /**
12586 * xmlCreateIOParserCtxt:
12587 * @sax: a SAX handler
12588 * @user_data: The user data returned on SAX callbacks
12589 * @ioread: an I/O read function
12590 * @ioclose: an I/O close function
12591 * @ioctx: an I/O handler
12592 * @enc: the charset encoding if known
12593 *
12594 * Create a parser context for using the XML parser with an existing
12595 * I/O stream
12596 *
12597 * Returns the new parser context or NULL
12598 */
12599 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)12600 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12601 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12602 void *ioctx, xmlCharEncoding enc) {
12603 xmlParserCtxtPtr ctxt;
12604 xmlParserInputPtr inputStream;
12605 xmlParserInputBufferPtr buf;
12606
12607 if (ioread == NULL) return(NULL);
12608
12609 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12610 if (buf == NULL) {
12611 if (ioclose != NULL)
12612 ioclose(ioctx);
12613 return (NULL);
12614 }
12615
12616 ctxt = xmlNewParserCtxt();
12617 if (ctxt == NULL) {
12618 xmlFreeParserInputBuffer(buf);
12619 return(NULL);
12620 }
12621 if (sax != NULL) {
12622 #ifdef LIBXML_SAX1_ENABLED
12623 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12624 #endif /* LIBXML_SAX1_ENABLED */
12625 xmlFree(ctxt->sax);
12626 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12627 if (ctxt->sax == NULL) {
12628 xmlErrMemory(ctxt, NULL);
12629 xmlFreeParserCtxt(ctxt);
12630 return(NULL);
12631 }
12632 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12633 if (sax->initialized == XML_SAX2_MAGIC)
12634 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12635 else
12636 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12637 if (user_data != NULL)
12638 ctxt->userData = user_data;
12639 }
12640
12641 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12642 if (inputStream == NULL) {
12643 xmlFreeParserCtxt(ctxt);
12644 return(NULL);
12645 }
12646 inputPush(ctxt, inputStream);
12647
12648 return(ctxt);
12649 }
12650
12651 #ifdef LIBXML_VALID_ENABLED
12652 /************************************************************************
12653 * *
12654 * Front ends when parsing a DTD *
12655 * *
12656 ************************************************************************/
12657
12658 /**
12659 * xmlIOParseDTD:
12660 * @sax: the SAX handler block or NULL
12661 * @input: an Input Buffer
12662 * @enc: the charset encoding if known
12663 *
12664 * Load and parse a DTD
12665 *
12666 * Returns the resulting xmlDtdPtr or NULL in case of error.
12667 * @input will be freed by the function in any case.
12668 */
12669
12670 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)12671 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12672 xmlCharEncoding enc) {
12673 xmlDtdPtr ret = NULL;
12674 xmlParserCtxtPtr ctxt;
12675 xmlParserInputPtr pinput = NULL;
12676 xmlChar start[4];
12677
12678 if (input == NULL)
12679 return(NULL);
12680
12681 ctxt = xmlNewParserCtxt();
12682 if (ctxt == NULL) {
12683 xmlFreeParserInputBuffer(input);
12684 return(NULL);
12685 }
12686
12687 /* We are loading a DTD */
12688 ctxt->options |= XML_PARSE_DTDLOAD;
12689
12690 /*
12691 * Set-up the SAX context
12692 */
12693 if (sax != NULL) {
12694 if (ctxt->sax != NULL)
12695 xmlFree(ctxt->sax);
12696 ctxt->sax = sax;
12697 ctxt->userData = ctxt;
12698 }
12699 xmlDetectSAX2(ctxt);
12700
12701 /*
12702 * generate a parser input from the I/O handler
12703 */
12704
12705 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12706 if (pinput == NULL) {
12707 if (sax != NULL) ctxt->sax = NULL;
12708 xmlFreeParserInputBuffer(input);
12709 xmlFreeParserCtxt(ctxt);
12710 return(NULL);
12711 }
12712
12713 /*
12714 * plug some encoding conversion routines here.
12715 */
12716 if (xmlPushInput(ctxt, pinput) < 0) {
12717 if (sax != NULL) ctxt->sax = NULL;
12718 xmlFreeParserCtxt(ctxt);
12719 return(NULL);
12720 }
12721 if (enc != XML_CHAR_ENCODING_NONE) {
12722 xmlSwitchEncoding(ctxt, enc);
12723 }
12724
12725 pinput->filename = NULL;
12726 pinput->line = 1;
12727 pinput->col = 1;
12728 pinput->base = ctxt->input->cur;
12729 pinput->cur = ctxt->input->cur;
12730 pinput->free = NULL;
12731
12732 /*
12733 * let's parse that entity knowing it's an external subset.
12734 */
12735 ctxt->inSubset = 2;
12736 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12737 if (ctxt->myDoc == NULL) {
12738 xmlErrMemory(ctxt, "New Doc failed");
12739 return(NULL);
12740 }
12741 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12742 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12743 BAD_CAST "none", BAD_CAST "none");
12744
12745 if ((enc == XML_CHAR_ENCODING_NONE) &&
12746 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12747 /*
12748 * Get the 4 first bytes and decode the charset
12749 * if enc != XML_CHAR_ENCODING_NONE
12750 * plug some encoding conversion routines.
12751 */
12752 start[0] = RAW;
12753 start[1] = NXT(1);
12754 start[2] = NXT(2);
12755 start[3] = NXT(3);
12756 enc = xmlDetectCharEncoding(start, 4);
12757 if (enc != XML_CHAR_ENCODING_NONE) {
12758 xmlSwitchEncoding(ctxt, enc);
12759 }
12760 }
12761
12762 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12763
12764 if (ctxt->myDoc != NULL) {
12765 if (ctxt->wellFormed) {
12766 ret = ctxt->myDoc->extSubset;
12767 ctxt->myDoc->extSubset = NULL;
12768 if (ret != NULL) {
12769 xmlNodePtr tmp;
12770
12771 ret->doc = NULL;
12772 tmp = ret->children;
12773 while (tmp != NULL) {
12774 tmp->doc = NULL;
12775 tmp = tmp->next;
12776 }
12777 }
12778 } else {
12779 ret = NULL;
12780 }
12781 xmlFreeDoc(ctxt->myDoc);
12782 ctxt->myDoc = NULL;
12783 }
12784 if (sax != NULL) ctxt->sax = NULL;
12785 xmlFreeParserCtxt(ctxt);
12786
12787 return(ret);
12788 }
12789
12790 /**
12791 * xmlSAXParseDTD:
12792 * @sax: the SAX handler block
12793 * @ExternalID: a NAME* containing the External ID of the DTD
12794 * @SystemID: a NAME* containing the URL to the DTD
12795 *
12796 * Load and parse an external subset.
12797 *
12798 * Returns the resulting xmlDtdPtr or NULL in case of error.
12799 */
12800
12801 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)12802 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12803 const xmlChar *SystemID) {
12804 xmlDtdPtr ret = NULL;
12805 xmlParserCtxtPtr ctxt;
12806 xmlParserInputPtr input = NULL;
12807 xmlCharEncoding enc;
12808 xmlChar* systemIdCanonic;
12809
12810 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12811
12812 ctxt = xmlNewParserCtxt();
12813 if (ctxt == NULL) {
12814 return(NULL);
12815 }
12816
12817 /* We are loading a DTD */
12818 ctxt->options |= XML_PARSE_DTDLOAD;
12819
12820 /*
12821 * Set-up the SAX context
12822 */
12823 if (sax != NULL) {
12824 if (ctxt->sax != NULL)
12825 xmlFree(ctxt->sax);
12826 ctxt->sax = sax;
12827 ctxt->userData = ctxt;
12828 }
12829
12830 /*
12831 * Canonicalise the system ID
12832 */
12833 systemIdCanonic = xmlCanonicPath(SystemID);
12834 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12835 xmlFreeParserCtxt(ctxt);
12836 return(NULL);
12837 }
12838
12839 /*
12840 * Ask the Entity resolver to load the damn thing
12841 */
12842
12843 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12844 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12845 systemIdCanonic);
12846 if (input == NULL) {
12847 if (sax != NULL) ctxt->sax = NULL;
12848 xmlFreeParserCtxt(ctxt);
12849 if (systemIdCanonic != NULL)
12850 xmlFree(systemIdCanonic);
12851 return(NULL);
12852 }
12853
12854 /*
12855 * plug some encoding conversion routines here.
12856 */
12857 if (xmlPushInput(ctxt, input) < 0) {
12858 if (sax != NULL) ctxt->sax = NULL;
12859 xmlFreeParserCtxt(ctxt);
12860 if (systemIdCanonic != NULL)
12861 xmlFree(systemIdCanonic);
12862 return(NULL);
12863 }
12864 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12865 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12866 xmlSwitchEncoding(ctxt, enc);
12867 }
12868
12869 if (input->filename == NULL)
12870 input->filename = (char *) systemIdCanonic;
12871 else
12872 xmlFree(systemIdCanonic);
12873 input->line = 1;
12874 input->col = 1;
12875 input->base = ctxt->input->cur;
12876 input->cur = ctxt->input->cur;
12877 input->free = NULL;
12878
12879 /*
12880 * let's parse that entity knowing it's an external subset.
12881 */
12882 ctxt->inSubset = 2;
12883 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12884 if (ctxt->myDoc == NULL) {
12885 xmlErrMemory(ctxt, "New Doc failed");
12886 if (sax != NULL) ctxt->sax = NULL;
12887 xmlFreeParserCtxt(ctxt);
12888 return(NULL);
12889 }
12890 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12891 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12892 ExternalID, SystemID);
12893 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12894
12895 if (ctxt->myDoc != NULL) {
12896 if (ctxt->wellFormed) {
12897 ret = ctxt->myDoc->extSubset;
12898 ctxt->myDoc->extSubset = NULL;
12899 if (ret != NULL) {
12900 xmlNodePtr tmp;
12901
12902 ret->doc = NULL;
12903 tmp = ret->children;
12904 while (tmp != NULL) {
12905 tmp->doc = NULL;
12906 tmp = tmp->next;
12907 }
12908 }
12909 } else {
12910 ret = NULL;
12911 }
12912 xmlFreeDoc(ctxt->myDoc);
12913 ctxt->myDoc = NULL;
12914 }
12915 if (sax != NULL) ctxt->sax = NULL;
12916 xmlFreeParserCtxt(ctxt);
12917
12918 return(ret);
12919 }
12920
12921
12922 /**
12923 * xmlParseDTD:
12924 * @ExternalID: a NAME* containing the External ID of the DTD
12925 * @SystemID: a NAME* containing the URL to the DTD
12926 *
12927 * Load and parse an external subset.
12928 *
12929 * Returns the resulting xmlDtdPtr or NULL in case of error.
12930 */
12931
12932 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)12933 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12934 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12935 }
12936 #endif /* LIBXML_VALID_ENABLED */
12937
12938 /************************************************************************
12939 * *
12940 * Front ends when parsing an Entity *
12941 * *
12942 ************************************************************************/
12943
12944 /**
12945 * xmlParseCtxtExternalEntity:
12946 * @ctx: the existing parsing context
12947 * @URL: the URL for the entity to load
12948 * @ID: the System ID for the entity to load
12949 * @lst: the return value for the set of parsed nodes
12950 *
12951 * Parse an external general entity within an existing parsing context
12952 * An external general parsed entity is well-formed if it matches the
12953 * production labeled extParsedEnt.
12954 *
12955 * [78] extParsedEnt ::= TextDecl? content
12956 *
12957 * Returns 0 if the entity is well formed, -1 in case of args problem and
12958 * the parser error code otherwise
12959 */
12960
12961 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12962 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12963 const xmlChar *ID, xmlNodePtr *lst) {
12964 void *userData;
12965
12966 if (ctx == NULL) return(-1);
12967 /*
12968 * If the user provided their own SAX callbacks, then reuse the
12969 * userData callback field, otherwise the expected setup in a
12970 * DOM builder is to have userData == ctxt
12971 */
12972 if (ctx->userData == ctx)
12973 userData = NULL;
12974 else
12975 userData = ctx->userData;
12976 return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12977 userData, ctx->depth + 1,
12978 URL, ID, lst);
12979 }
12980
12981 /**
12982 * xmlParseExternalEntityPrivate:
12983 * @doc: the document the chunk pertains to
12984 * @oldctxt: the previous parser context if available
12985 * @sax: the SAX handler block (possibly NULL)
12986 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12987 * @depth: Used for loop detection, use 0
12988 * @URL: the URL for the entity to load
12989 * @ID: the System ID for the entity to load
12990 * @list: the return value for the set of parsed nodes
12991 *
12992 * Private version of xmlParseExternalEntity()
12993 *
12994 * Returns 0 if the entity is well formed, -1 in case of args problem and
12995 * the parser error code otherwise
12996 */
12997
12998 static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc,xmlParserCtxtPtr oldctxt,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)12999 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13000 xmlSAXHandlerPtr sax,
13001 void *user_data, int depth, const xmlChar *URL,
13002 const xmlChar *ID, xmlNodePtr *list) {
13003 xmlParserCtxtPtr ctxt;
13004 xmlDocPtr newDoc;
13005 xmlNodePtr newRoot;
13006 xmlSAXHandlerPtr oldsax = NULL;
13007 xmlParserErrors ret = XML_ERR_OK;
13008 xmlChar start[4];
13009 xmlCharEncoding enc;
13010
13011 if (((depth > 40) &&
13012 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13013 (depth > 1024)) {
13014 return(XML_ERR_ENTITY_LOOP);
13015 }
13016
13017 if (list != NULL)
13018 *list = NULL;
13019 if ((URL == NULL) && (ID == NULL))
13020 return(XML_ERR_INTERNAL_ERROR);
13021 if (doc == NULL)
13022 return(XML_ERR_INTERNAL_ERROR);
13023
13024
13025 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13026 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13027 ctxt->userData = ctxt;
13028 if (sax != NULL) {
13029 oldsax = ctxt->sax;
13030 ctxt->sax = sax;
13031 if (user_data != NULL)
13032 ctxt->userData = user_data;
13033 }
13034 xmlDetectSAX2(ctxt);
13035 newDoc = xmlNewDoc(BAD_CAST "1.0");
13036 if (newDoc == NULL) {
13037 xmlFreeParserCtxt(ctxt);
13038 return(XML_ERR_INTERNAL_ERROR);
13039 }
13040 newDoc->properties = XML_DOC_INTERNAL;
13041 if (doc) {
13042 newDoc->intSubset = doc->intSubset;
13043 newDoc->extSubset = doc->extSubset;
13044 if (doc->dict) {
13045 newDoc->dict = doc->dict;
13046 xmlDictReference(newDoc->dict);
13047 }
13048 if (doc->URL != NULL) {
13049 newDoc->URL = xmlStrdup(doc->URL);
13050 }
13051 }
13052 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13053 if (newRoot == NULL) {
13054 if (sax != NULL)
13055 ctxt->sax = oldsax;
13056 xmlFreeParserCtxt(ctxt);
13057 newDoc->intSubset = NULL;
13058 newDoc->extSubset = NULL;
13059 xmlFreeDoc(newDoc);
13060 return(XML_ERR_INTERNAL_ERROR);
13061 }
13062 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13063 nodePush(ctxt, newDoc->children);
13064 if (doc == NULL) {
13065 ctxt->myDoc = newDoc;
13066 } else {
13067 ctxt->myDoc = doc;
13068 newRoot->doc = doc;
13069 }
13070
13071 /*
13072 * Get the 4 first bytes and decode the charset
13073 * if enc != XML_CHAR_ENCODING_NONE
13074 * plug some encoding conversion routines.
13075 */
13076 GROW;
13077 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13078 start[0] = RAW;
13079 start[1] = NXT(1);
13080 start[2] = NXT(2);
13081 start[3] = NXT(3);
13082 enc = xmlDetectCharEncoding(start, 4);
13083 if (enc != XML_CHAR_ENCODING_NONE) {
13084 xmlSwitchEncoding(ctxt, enc);
13085 }
13086 }
13087
13088 /*
13089 * Parse a possible text declaration first
13090 */
13091 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13092 xmlParseTextDecl(ctxt);
13093 /*
13094 * An XML-1.0 document can't reference an entity not XML-1.0
13095 */
13096 if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
13097 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13098 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13099 "Version mismatch between document and entity\n");
13100 }
13101 }
13102
13103 ctxt->instate = XML_PARSER_CONTENT;
13104 ctxt->depth = depth;
13105 if (oldctxt != NULL) {
13106 ctxt->_private = oldctxt->_private;
13107 ctxt->loadsubset = oldctxt->loadsubset;
13108 ctxt->validate = oldctxt->validate;
13109 ctxt->valid = oldctxt->valid;
13110 ctxt->replaceEntities = oldctxt->replaceEntities;
13111 if (oldctxt->validate) {
13112 ctxt->vctxt.error = oldctxt->vctxt.error;
13113 ctxt->vctxt.warning = oldctxt->vctxt.warning;
13114 ctxt->vctxt.userData = oldctxt->vctxt.userData;
13115 }
13116 ctxt->external = oldctxt->external;
13117 if (ctxt->dict) xmlDictFree(ctxt->dict);
13118 ctxt->dict = oldctxt->dict;
13119 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13120 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13121 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13122 ctxt->dictNames = oldctxt->dictNames;
13123 ctxt->attsDefault = oldctxt->attsDefault;
13124 ctxt->attsSpecial = oldctxt->attsSpecial;
13125 ctxt->linenumbers = oldctxt->linenumbers;
13126 ctxt->record_info = oldctxt->record_info;
13127 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13128 ctxt->node_seq.length = oldctxt->node_seq.length;
13129 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13130 } else {
13131 /*
13132 * Doing validity checking on chunk without context
13133 * doesn't make sense
13134 */
13135 ctxt->_private = NULL;
13136 ctxt->validate = 0;
13137 ctxt->external = 2;
13138 ctxt->loadsubset = 0;
13139 }
13140
13141 xmlParseContent(ctxt);
13142
13143 if ((RAW == '<') && (NXT(1) == '/')) {
13144 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13145 } else if (RAW != 0) {
13146 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13147 }
13148 if (ctxt->node != newDoc->children) {
13149 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13150 }
13151
13152 if (!ctxt->wellFormed) {
13153 if (ctxt->errNo == 0)
13154 ret = XML_ERR_INTERNAL_ERROR;
13155 else
13156 ret = (xmlParserErrors)ctxt->errNo;
13157 } else {
13158 if (list != NULL) {
13159 xmlNodePtr cur;
13160
13161 /*
13162 * Return the newly created nodeset after unlinking it from
13163 * they pseudo parent.
13164 */
13165 cur = newDoc->children->children;
13166 *list = cur;
13167 while (cur != NULL) {
13168 cur->parent = NULL;
13169 cur = cur->next;
13170 }
13171 newDoc->children->children = NULL;
13172 }
13173 ret = XML_ERR_OK;
13174 }
13175
13176 /*
13177 * Record in the parent context the number of entities replacement
13178 * done when parsing that reference.
13179 */
13180 if (oldctxt != NULL)
13181 oldctxt->nbentities += ctxt->nbentities;
13182
13183 /*
13184 * Also record the size of the entity parsed
13185 */
13186 if (ctxt->input != NULL && oldctxt != NULL) {
13187 oldctxt->sizeentities += ctxt->input->consumed;
13188 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13189 }
13190 /*
13191 * And record the last error if any
13192 */
13193 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13194 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13195
13196 if (sax != NULL)
13197 ctxt->sax = oldsax;
13198 if (oldctxt != NULL) {
13199 ctxt->dict = NULL;
13200 ctxt->attsDefault = NULL;
13201 ctxt->attsSpecial = NULL;
13202 oldctxt->validate = ctxt->validate;
13203 oldctxt->valid = ctxt->valid;
13204 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13205 oldctxt->node_seq.length = ctxt->node_seq.length;
13206 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13207 }
13208 ctxt->node_seq.maximum = 0;
13209 ctxt->node_seq.length = 0;
13210 ctxt->node_seq.buffer = NULL;
13211 xmlFreeParserCtxt(ctxt);
13212 newDoc->intSubset = NULL;
13213 newDoc->extSubset = NULL;
13214 xmlFreeDoc(newDoc);
13215
13216 return(ret);
13217 }
13218
13219 #ifdef LIBXML_SAX1_ENABLED
13220 /**
13221 * xmlParseExternalEntity:
13222 * @doc: the document the chunk pertains to
13223 * @sax: the SAX handler block (possibly NULL)
13224 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13225 * @depth: Used for loop detection, use 0
13226 * @URL: the URL for the entity to load
13227 * @ID: the System ID for the entity to load
13228 * @lst: the return value for the set of parsed nodes
13229 *
13230 * Parse an external general entity
13231 * An external general parsed entity is well-formed if it matches the
13232 * production labeled extParsedEnt.
13233 *
13234 * [78] extParsedEnt ::= TextDecl? content
13235 *
13236 * Returns 0 if the entity is well formed, -1 in case of args problem and
13237 * the parser error code otherwise
13238 */
13239
13240 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)13241 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13242 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13243 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13244 ID, lst));
13245 }
13246
13247 /**
13248 * xmlParseBalancedChunkMemory:
13249 * @doc: the document the chunk pertains to (must not be NULL)
13250 * @sax: the SAX handler block (possibly NULL)
13251 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13252 * @depth: Used for loop detection, use 0
13253 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13254 * @lst: the return value for the set of parsed nodes
13255 *
13256 * Parse a well-balanced chunk of an XML document
13257 * called by the parser
13258 * The allowed sequence for the Well Balanced Chunk is the one defined by
13259 * the content production in the XML grammar:
13260 *
13261 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13262 *
13263 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13264 * the parser error code otherwise
13265 */
13266
13267 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)13268 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13269 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13270 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13271 depth, string, lst, 0 );
13272 }
13273 #endif /* LIBXML_SAX1_ENABLED */
13274
13275 /**
13276 * xmlParseBalancedChunkMemoryInternal:
13277 * @oldctxt: the existing parsing context
13278 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13279 * @user_data: the user data field for the parser context
13280 * @lst: the return value for the set of parsed nodes
13281 *
13282 *
13283 * Parse a well-balanced chunk of an XML document
13284 * called by the parser
13285 * The allowed sequence for the Well Balanced Chunk is the one defined by
13286 * the content production in the XML grammar:
13287 *
13288 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13289 *
13290 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13291 * error code otherwise
13292 *
13293 * In case recover is set to 1, the nodelist will not be empty even if
13294 * the parsed chunk is not well balanced.
13295 */
13296 static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,const xmlChar * string,void * user_data,xmlNodePtr * lst)13297 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13298 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13299 xmlParserCtxtPtr ctxt;
13300 xmlDocPtr newDoc = NULL;
13301 xmlNodePtr newRoot;
13302 xmlSAXHandlerPtr oldsax = NULL;
13303 xmlNodePtr content = NULL;
13304 xmlNodePtr last = NULL;
13305 int size;
13306 xmlParserErrors ret = XML_ERR_OK;
13307 #ifdef SAX2
13308 int i;
13309 #endif
13310
13311 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13312 (oldctxt->depth > 1024)) {
13313 return(XML_ERR_ENTITY_LOOP);
13314 }
13315
13316
13317 if (lst != NULL)
13318 *lst = NULL;
13319 if (string == NULL)
13320 return(XML_ERR_INTERNAL_ERROR);
13321
13322 size = xmlStrlen(string);
13323
13324 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13325 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13326 if (user_data != NULL)
13327 ctxt->userData = user_data;
13328 else
13329 ctxt->userData = ctxt;
13330 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13331 ctxt->dict = oldctxt->dict;
13332 ctxt->input_id = oldctxt->input_id + 1;
13333 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13334 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13335 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13336
13337 #ifdef SAX2
13338 /* propagate namespaces down the entity */
13339 for (i = 0;i < oldctxt->nsNr;i += 2) {
13340 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13341 }
13342 #endif
13343
13344 oldsax = ctxt->sax;
13345 ctxt->sax = oldctxt->sax;
13346 xmlDetectSAX2(ctxt);
13347 ctxt->replaceEntities = oldctxt->replaceEntities;
13348 ctxt->options = oldctxt->options;
13349
13350 ctxt->_private = oldctxt->_private;
13351 if (oldctxt->myDoc == NULL) {
13352 newDoc = xmlNewDoc(BAD_CAST "1.0");
13353 if (newDoc == NULL) {
13354 ctxt->sax = oldsax;
13355 ctxt->dict = NULL;
13356 xmlFreeParserCtxt(ctxt);
13357 return(XML_ERR_INTERNAL_ERROR);
13358 }
13359 newDoc->properties = XML_DOC_INTERNAL;
13360 newDoc->dict = ctxt->dict;
13361 xmlDictReference(newDoc->dict);
13362 ctxt->myDoc = newDoc;
13363 } else {
13364 ctxt->myDoc = oldctxt->myDoc;
13365 content = ctxt->myDoc->children;
13366 last = ctxt->myDoc->last;
13367 }
13368 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13369 if (newRoot == NULL) {
13370 ctxt->sax = oldsax;
13371 ctxt->dict = NULL;
13372 xmlFreeParserCtxt(ctxt);
13373 if (newDoc != NULL) {
13374 xmlFreeDoc(newDoc);
13375 }
13376 return(XML_ERR_INTERNAL_ERROR);
13377 }
13378 ctxt->myDoc->children = NULL;
13379 ctxt->myDoc->last = NULL;
13380 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13381 nodePush(ctxt, ctxt->myDoc->children);
13382 ctxt->instate = XML_PARSER_CONTENT;
13383 ctxt->depth = oldctxt->depth + 1;
13384
13385 ctxt->validate = 0;
13386 ctxt->loadsubset = oldctxt->loadsubset;
13387 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13388 /*
13389 * ID/IDREF registration will be done in xmlValidateElement below
13390 */
13391 ctxt->loadsubset |= XML_SKIP_IDS;
13392 }
13393 ctxt->dictNames = oldctxt->dictNames;
13394 ctxt->attsDefault = oldctxt->attsDefault;
13395 ctxt->attsSpecial = oldctxt->attsSpecial;
13396
13397 xmlParseContent(ctxt);
13398 if ((RAW == '<') && (NXT(1) == '/')) {
13399 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13400 } else if (RAW != 0) {
13401 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13402 }
13403 if (ctxt->node != ctxt->myDoc->children) {
13404 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13405 }
13406
13407 if (!ctxt->wellFormed) {
13408 if (ctxt->errNo == 0)
13409 ret = XML_ERR_INTERNAL_ERROR;
13410 else
13411 ret = (xmlParserErrors)ctxt->errNo;
13412 } else {
13413 ret = XML_ERR_OK;
13414 }
13415
13416 if ((lst != NULL) && (ret == XML_ERR_OK)) {
13417 xmlNodePtr cur;
13418
13419 /*
13420 * Return the newly created nodeset after unlinking it from
13421 * they pseudo parent.
13422 */
13423 cur = ctxt->myDoc->children->children;
13424 *lst = cur;
13425 while (cur != NULL) {
13426 #ifdef LIBXML_VALID_ENABLED
13427 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13428 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13429 (cur->type == XML_ELEMENT_NODE)) {
13430 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13431 oldctxt->myDoc, cur);
13432 }
13433 #endif /* LIBXML_VALID_ENABLED */
13434 cur->parent = NULL;
13435 cur = cur->next;
13436 }
13437 ctxt->myDoc->children->children = NULL;
13438 }
13439 if (ctxt->myDoc != NULL) {
13440 xmlFreeNode(ctxt->myDoc->children);
13441 ctxt->myDoc->children = content;
13442 ctxt->myDoc->last = last;
13443 }
13444
13445 /*
13446 * Record in the parent context the number of entities replacement
13447 * done when parsing that reference.
13448 */
13449 if (oldctxt != NULL)
13450 oldctxt->nbentities += ctxt->nbentities;
13451
13452 /*
13453 * Also record the last error if any
13454 */
13455 if (ctxt->lastError.code != XML_ERR_OK)
13456 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13457
13458 ctxt->sax = oldsax;
13459 ctxt->dict = NULL;
13460 ctxt->attsDefault = NULL;
13461 ctxt->attsSpecial = NULL;
13462 xmlFreeParserCtxt(ctxt);
13463 if (newDoc != NULL) {
13464 xmlFreeDoc(newDoc);
13465 }
13466
13467 return(ret);
13468 }
13469
13470 /**
13471 * xmlParseInNodeContext:
13472 * @node: the context node
13473 * @data: the input string
13474 * @datalen: the input string length in bytes
13475 * @options: a combination of xmlParserOption
13476 * @lst: the return value for the set of parsed nodes
13477 *
13478 * Parse a well-balanced chunk of an XML document
13479 * within the context (DTD, namespaces, etc ...) of the given node.
13480 *
13481 * The allowed sequence for the data is a Well Balanced Chunk defined by
13482 * the content production in the XML grammar:
13483 *
13484 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13485 *
13486 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13487 * error code otherwise
13488 */
13489 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)13490 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13491 int options, xmlNodePtr *lst) {
13492 #ifdef SAX2
13493 xmlParserCtxtPtr ctxt;
13494 xmlDocPtr doc = NULL;
13495 xmlNodePtr fake, cur;
13496 int nsnr = 0;
13497
13498 xmlParserErrors ret = XML_ERR_OK;
13499
13500 /*
13501 * check all input parameters, grab the document
13502 */
13503 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13504 return(XML_ERR_INTERNAL_ERROR);
13505 switch (node->type) {
13506 case XML_ELEMENT_NODE:
13507 case XML_ATTRIBUTE_NODE:
13508 case XML_TEXT_NODE:
13509 case XML_CDATA_SECTION_NODE:
13510 case XML_ENTITY_REF_NODE:
13511 case XML_PI_NODE:
13512 case XML_COMMENT_NODE:
13513 case XML_DOCUMENT_NODE:
13514 case XML_HTML_DOCUMENT_NODE:
13515 break;
13516 default:
13517 return(XML_ERR_INTERNAL_ERROR);
13518
13519 }
13520 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13521 (node->type != XML_DOCUMENT_NODE) &&
13522 (node->type != XML_HTML_DOCUMENT_NODE))
13523 node = node->parent;
13524 if (node == NULL)
13525 return(XML_ERR_INTERNAL_ERROR);
13526 if (node->type == XML_ELEMENT_NODE)
13527 doc = node->doc;
13528 else
13529 doc = (xmlDocPtr) node;
13530 if (doc == NULL)
13531 return(XML_ERR_INTERNAL_ERROR);
13532
13533 /*
13534 * allocate a context and set-up everything not related to the
13535 * node position in the tree
13536 */
13537 if (doc->type == XML_DOCUMENT_NODE)
13538 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13539 #ifdef LIBXML_HTML_ENABLED
13540 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13541 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13542 /*
13543 * When parsing in context, it makes no sense to add implied
13544 * elements like html/body/etc...
13545 */
13546 options |= HTML_PARSE_NOIMPLIED;
13547 }
13548 #endif
13549 else
13550 return(XML_ERR_INTERNAL_ERROR);
13551
13552 if (ctxt == NULL)
13553 return(XML_ERR_NO_MEMORY);
13554
13555 /*
13556 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13557 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13558 * we must wait until the last moment to free the original one.
13559 */
13560 if (doc->dict != NULL) {
13561 if (ctxt->dict != NULL)
13562 xmlDictFree(ctxt->dict);
13563 ctxt->dict = doc->dict;
13564 } else
13565 options |= XML_PARSE_NODICT;
13566
13567 if (doc->encoding != NULL) {
13568 xmlCharEncodingHandlerPtr hdlr;
13569
13570 if (ctxt->encoding != NULL)
13571 xmlFree((xmlChar *) ctxt->encoding);
13572 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13573
13574 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13575 if (hdlr != NULL) {
13576 xmlSwitchToEncoding(ctxt, hdlr);
13577 } else {
13578 return(XML_ERR_UNSUPPORTED_ENCODING);
13579 }
13580 }
13581
13582 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13583 xmlDetectSAX2(ctxt);
13584 ctxt->myDoc = doc;
13585 /* parsing in context, i.e. as within existing content */
13586 ctxt->input_id = 2;
13587 ctxt->instate = XML_PARSER_CONTENT;
13588
13589 fake = xmlNewComment(NULL);
13590 if (fake == NULL) {
13591 xmlFreeParserCtxt(ctxt);
13592 return(XML_ERR_NO_MEMORY);
13593 }
13594 xmlAddChild(node, fake);
13595
13596 if (node->type == XML_ELEMENT_NODE) {
13597 nodePush(ctxt, node);
13598 /*
13599 * initialize the SAX2 namespaces stack
13600 */
13601 cur = node;
13602 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13603 xmlNsPtr ns = cur->nsDef;
13604 const xmlChar *iprefix, *ihref;
13605
13606 while (ns != NULL) {
13607 if (ctxt->dict) {
13608 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13609 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13610 } else {
13611 iprefix = ns->prefix;
13612 ihref = ns->href;
13613 }
13614
13615 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13616 nsPush(ctxt, iprefix, ihref);
13617 nsnr++;
13618 }
13619 ns = ns->next;
13620 }
13621 cur = cur->parent;
13622 }
13623 }
13624
13625 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13626 /*
13627 * ID/IDREF registration will be done in xmlValidateElement below
13628 */
13629 ctxt->loadsubset |= XML_SKIP_IDS;
13630 }
13631
13632 #ifdef LIBXML_HTML_ENABLED
13633 if (doc->type == XML_HTML_DOCUMENT_NODE)
13634 __htmlParseContent(ctxt);
13635 else
13636 #endif
13637 xmlParseContent(ctxt);
13638
13639 nsPop(ctxt, nsnr);
13640 if ((RAW == '<') && (NXT(1) == '/')) {
13641 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13642 } else if (RAW != 0) {
13643 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13644 }
13645 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13646 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13647 ctxt->wellFormed = 0;
13648 }
13649
13650 if (!ctxt->wellFormed) {
13651 if (ctxt->errNo == 0)
13652 ret = XML_ERR_INTERNAL_ERROR;
13653 else
13654 ret = (xmlParserErrors)ctxt->errNo;
13655 } else {
13656 ret = XML_ERR_OK;
13657 }
13658
13659 /*
13660 * Return the newly created nodeset after unlinking it from
13661 * the pseudo sibling.
13662 */
13663
13664 cur = fake->next;
13665 fake->next = NULL;
13666 node->last = fake;
13667
13668 if (cur != NULL) {
13669 cur->prev = NULL;
13670 }
13671
13672 *lst = cur;
13673
13674 while (cur != NULL) {
13675 cur->parent = NULL;
13676 cur = cur->next;
13677 }
13678
13679 xmlUnlinkNode(fake);
13680 xmlFreeNode(fake);
13681
13682
13683 if (ret != XML_ERR_OK) {
13684 xmlFreeNodeList(*lst);
13685 *lst = NULL;
13686 }
13687
13688 if (doc->dict != NULL)
13689 ctxt->dict = NULL;
13690 xmlFreeParserCtxt(ctxt);
13691
13692 return(ret);
13693 #else /* !SAX2 */
13694 return(XML_ERR_INTERNAL_ERROR);
13695 #endif
13696 }
13697
13698 #ifdef LIBXML_SAX1_ENABLED
13699 /**
13700 * xmlParseBalancedChunkMemoryRecover:
13701 * @doc: the document the chunk pertains to (must not be NULL)
13702 * @sax: the SAX handler block (possibly NULL)
13703 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13704 * @depth: Used for loop detection, use 0
13705 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13706 * @lst: the return value for the set of parsed nodes
13707 * @recover: return nodes even if the data is broken (use 0)
13708 *
13709 *
13710 * Parse a well-balanced chunk of an XML document
13711 * called by the parser
13712 * The allowed sequence for the Well Balanced Chunk is the one defined by
13713 * the content production in the XML grammar:
13714 *
13715 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13716 *
13717 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13718 * the parser error code otherwise
13719 *
13720 * In case recover is set to 1, the nodelist will not be empty even if
13721 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13722 * some extent.
13723 */
13724 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst,int recover)13725 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13726 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13727 int recover) {
13728 xmlParserCtxtPtr ctxt;
13729 xmlDocPtr newDoc;
13730 xmlSAXHandlerPtr oldsax = NULL;
13731 xmlNodePtr content, newRoot;
13732 int size;
13733 int ret = 0;
13734
13735 if (depth > 40) {
13736 return(XML_ERR_ENTITY_LOOP);
13737 }
13738
13739
13740 if (lst != NULL)
13741 *lst = NULL;
13742 if (string == NULL)
13743 return(-1);
13744
13745 size = xmlStrlen(string);
13746
13747 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13748 if (ctxt == NULL) return(-1);
13749 ctxt->userData = ctxt;
13750 if (sax != NULL) {
13751 oldsax = ctxt->sax;
13752 ctxt->sax = sax;
13753 if (user_data != NULL)
13754 ctxt->userData = user_data;
13755 }
13756 newDoc = xmlNewDoc(BAD_CAST "1.0");
13757 if (newDoc == NULL) {
13758 xmlFreeParserCtxt(ctxt);
13759 return(-1);
13760 }
13761 newDoc->properties = XML_DOC_INTERNAL;
13762 if ((doc != NULL) && (doc->dict != NULL)) {
13763 xmlDictFree(ctxt->dict);
13764 ctxt->dict = doc->dict;
13765 xmlDictReference(ctxt->dict);
13766 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13767 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13768 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13769 ctxt->dictNames = 1;
13770 } else {
13771 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13772 }
13773 /* doc == NULL is only supported for historic reasons */
13774 if (doc != NULL) {
13775 newDoc->intSubset = doc->intSubset;
13776 newDoc->extSubset = doc->extSubset;
13777 }
13778 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13779 if (newRoot == NULL) {
13780 if (sax != NULL)
13781 ctxt->sax = oldsax;
13782 xmlFreeParserCtxt(ctxt);
13783 newDoc->intSubset = NULL;
13784 newDoc->extSubset = NULL;
13785 xmlFreeDoc(newDoc);
13786 return(-1);
13787 }
13788 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13789 nodePush(ctxt, newRoot);
13790 /* doc == NULL is only supported for historic reasons */
13791 if (doc == NULL) {
13792 ctxt->myDoc = newDoc;
13793 } else {
13794 ctxt->myDoc = newDoc;
13795 newDoc->children->doc = doc;
13796 /* Ensure that doc has XML spec namespace */
13797 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13798 newDoc->oldNs = doc->oldNs;
13799 }
13800 ctxt->instate = XML_PARSER_CONTENT;
13801 ctxt->input_id = 2;
13802 ctxt->depth = depth;
13803
13804 /*
13805 * Doing validity checking on chunk doesn't make sense
13806 */
13807 ctxt->validate = 0;
13808 ctxt->loadsubset = 0;
13809 xmlDetectSAX2(ctxt);
13810
13811 if ( doc != NULL ){
13812 content = doc->children;
13813 doc->children = NULL;
13814 xmlParseContent(ctxt);
13815 doc->children = content;
13816 }
13817 else {
13818 xmlParseContent(ctxt);
13819 }
13820 if ((RAW == '<') && (NXT(1) == '/')) {
13821 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13822 } else if (RAW != 0) {
13823 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13824 }
13825 if (ctxt->node != newDoc->children) {
13826 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13827 }
13828
13829 if (!ctxt->wellFormed) {
13830 if (ctxt->errNo == 0)
13831 ret = 1;
13832 else
13833 ret = ctxt->errNo;
13834 } else {
13835 ret = 0;
13836 }
13837
13838 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13839 xmlNodePtr cur;
13840
13841 /*
13842 * Return the newly created nodeset after unlinking it from
13843 * they pseudo parent.
13844 */
13845 cur = newDoc->children->children;
13846 *lst = cur;
13847 while (cur != NULL) {
13848 xmlSetTreeDoc(cur, doc);
13849 cur->parent = NULL;
13850 cur = cur->next;
13851 }
13852 newDoc->children->children = NULL;
13853 }
13854
13855 if (sax != NULL)
13856 ctxt->sax = oldsax;
13857 xmlFreeParserCtxt(ctxt);
13858 newDoc->intSubset = NULL;
13859 newDoc->extSubset = NULL;
13860 /* This leaks the namespace list if doc == NULL */
13861 newDoc->oldNs = NULL;
13862 xmlFreeDoc(newDoc);
13863
13864 return(ret);
13865 }
13866
13867 /**
13868 * xmlSAXParseEntity:
13869 * @sax: the SAX handler block
13870 * @filename: the filename
13871 *
13872 * parse an XML external entity out of context and build a tree.
13873 * It use the given SAX function block to handle the parsing callback.
13874 * If sax is NULL, fallback to the default DOM tree building routines.
13875 *
13876 * [78] extParsedEnt ::= TextDecl? content
13877 *
13878 * This correspond to a "Well Balanced" chunk
13879 *
13880 * Returns the resulting document tree
13881 */
13882
13883 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)13884 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13885 xmlDocPtr ret;
13886 xmlParserCtxtPtr ctxt;
13887
13888 ctxt = xmlCreateFileParserCtxt(filename);
13889 if (ctxt == NULL) {
13890 return(NULL);
13891 }
13892 if (sax != NULL) {
13893 if (ctxt->sax != NULL)
13894 xmlFree(ctxt->sax);
13895 ctxt->sax = sax;
13896 ctxt->userData = NULL;
13897 }
13898
13899 xmlParseExtParsedEnt(ctxt);
13900
13901 if (ctxt->wellFormed)
13902 ret = ctxt->myDoc;
13903 else {
13904 ret = NULL;
13905 xmlFreeDoc(ctxt->myDoc);
13906 ctxt->myDoc = NULL;
13907 }
13908 if (sax != NULL)
13909 ctxt->sax = NULL;
13910 xmlFreeParserCtxt(ctxt);
13911
13912 return(ret);
13913 }
13914
13915 /**
13916 * xmlParseEntity:
13917 * @filename: the filename
13918 *
13919 * parse an XML external entity out of context and build a tree.
13920 *
13921 * [78] extParsedEnt ::= TextDecl? content
13922 *
13923 * This correspond to a "Well Balanced" chunk
13924 *
13925 * Returns the resulting document tree
13926 */
13927
13928 xmlDocPtr
xmlParseEntity(const char * filename)13929 xmlParseEntity(const char *filename) {
13930 return(xmlSAXParseEntity(NULL, filename));
13931 }
13932 #endif /* LIBXML_SAX1_ENABLED */
13933
13934 /**
13935 * xmlCreateEntityParserCtxtInternal:
13936 * @URL: the entity URL
13937 * @ID: the entity PUBLIC ID
13938 * @base: a possible base for the target URI
13939 * @pctx: parser context used to set options on new context
13940 *
13941 * Create a parser context for an external entity
13942 * Automatic support for ZLIB/Compress compressed document is provided
13943 * by default if found at compile-time.
13944 *
13945 * Returns the new parser context or NULL
13946 */
13947 static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(const xmlChar * URL,const xmlChar * ID,const xmlChar * base,xmlParserCtxtPtr pctx)13948 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13949 const xmlChar *base, xmlParserCtxtPtr pctx) {
13950 xmlParserCtxtPtr ctxt;
13951 xmlParserInputPtr inputStream;
13952 char *directory = NULL;
13953 xmlChar *uri;
13954
13955 ctxt = xmlNewParserCtxt();
13956 if (ctxt == NULL) {
13957 return(NULL);
13958 }
13959
13960 if (pctx != NULL) {
13961 ctxt->options = pctx->options;
13962 ctxt->_private = pctx->_private;
13963 /*
13964 * this is a subparser of pctx, so the input_id should be
13965 * incremented to distinguish from main entity
13966 */
13967 ctxt->input_id = pctx->input_id + 1;
13968 }
13969
13970 /* Don't read from stdin. */
13971 if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13972 URL = BAD_CAST "./-";
13973
13974 uri = xmlBuildURI(URL, base);
13975
13976 if (uri == NULL) {
13977 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13978 if (inputStream == NULL) {
13979 xmlFreeParserCtxt(ctxt);
13980 return(NULL);
13981 }
13982
13983 inputPush(ctxt, inputStream);
13984
13985 if ((ctxt->directory == NULL) && (directory == NULL))
13986 directory = xmlParserGetDirectory((char *)URL);
13987 if ((ctxt->directory == NULL) && (directory != NULL))
13988 ctxt->directory = directory;
13989 } else {
13990 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13991 if (inputStream == NULL) {
13992 xmlFree(uri);
13993 xmlFreeParserCtxt(ctxt);
13994 return(NULL);
13995 }
13996
13997 inputPush(ctxt, inputStream);
13998
13999 if ((ctxt->directory == NULL) && (directory == NULL))
14000 directory = xmlParserGetDirectory((char *)uri);
14001 if ((ctxt->directory == NULL) && (directory != NULL))
14002 ctxt->directory = directory;
14003 xmlFree(uri);
14004 }
14005 return(ctxt);
14006 }
14007
14008 /**
14009 * xmlCreateEntityParserCtxt:
14010 * @URL: the entity URL
14011 * @ID: the entity PUBLIC ID
14012 * @base: a possible base for the target URI
14013 *
14014 * Create a parser context for an external entity
14015 * Automatic support for ZLIB/Compress compressed document is provided
14016 * by default if found at compile-time.
14017 *
14018 * Returns the new parser context or NULL
14019 */
14020 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)14021 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14022 const xmlChar *base) {
14023 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14024
14025 }
14026
14027 /************************************************************************
14028 * *
14029 * Front ends when parsing from a file *
14030 * *
14031 ************************************************************************/
14032
14033 /**
14034 * xmlCreateURLParserCtxt:
14035 * @filename: the filename or URL
14036 * @options: a combination of xmlParserOption
14037 *
14038 * Create a parser context for a file or URL content.
14039 * Automatic support for ZLIB/Compress compressed document is provided
14040 * by default if found at compile-time and for file accesses
14041 *
14042 * Returns the new parser context or NULL
14043 */
14044 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)14045 xmlCreateURLParserCtxt(const char *filename, int options)
14046 {
14047 xmlParserCtxtPtr ctxt;
14048 xmlParserInputPtr inputStream;
14049 char *directory = NULL;
14050
14051 ctxt = xmlNewParserCtxt();
14052 if (ctxt == NULL) {
14053 xmlErrMemory(NULL, "cannot allocate parser context");
14054 return(NULL);
14055 }
14056
14057 if (options)
14058 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14059 ctxt->linenumbers = 1;
14060
14061 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14062 if (inputStream == NULL) {
14063 xmlFreeParserCtxt(ctxt);
14064 return(NULL);
14065 }
14066
14067 inputPush(ctxt, inputStream);
14068 if ((ctxt->directory == NULL) && (directory == NULL))
14069 directory = xmlParserGetDirectory(filename);
14070 if ((ctxt->directory == NULL) && (directory != NULL))
14071 ctxt->directory = directory;
14072
14073 return(ctxt);
14074 }
14075
14076 /**
14077 * xmlCreateFileParserCtxt:
14078 * @filename: the filename
14079 *
14080 * Create a parser context for a file content.
14081 * Automatic support for ZLIB/Compress compressed document is provided
14082 * by default if found at compile-time.
14083 *
14084 * Returns the new parser context or NULL
14085 */
14086 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)14087 xmlCreateFileParserCtxt(const char *filename)
14088 {
14089 return(xmlCreateURLParserCtxt(filename, 0));
14090 }
14091
14092 #ifdef LIBXML_SAX1_ENABLED
14093 /**
14094 * xmlSAXParseFileWithData:
14095 * @sax: the SAX handler block
14096 * @filename: the filename
14097 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14098 * documents
14099 * @data: the userdata
14100 *
14101 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14102 * compressed document is provided by default if found at compile-time.
14103 * It use the given SAX function block to handle the parsing callback.
14104 * If sax is NULL, fallback to the default DOM tree building routines.
14105 *
14106 * User data (void *) is stored within the parser context in the
14107 * context's _private member, so it is available nearly everywhere in libxml
14108 *
14109 * Returns the resulting document tree
14110 */
14111
14112 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)14113 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14114 int recovery, void *data) {
14115 xmlDocPtr ret;
14116 xmlParserCtxtPtr ctxt;
14117
14118 xmlInitParser();
14119
14120 ctxt = xmlCreateFileParserCtxt(filename);
14121 if (ctxt == NULL) {
14122 return(NULL);
14123 }
14124 if (sax != NULL) {
14125 if (ctxt->sax != NULL)
14126 xmlFree(ctxt->sax);
14127 ctxt->sax = sax;
14128 }
14129 xmlDetectSAX2(ctxt);
14130 if (data!=NULL) {
14131 ctxt->_private = data;
14132 }
14133
14134 if (ctxt->directory == NULL)
14135 ctxt->directory = xmlParserGetDirectory(filename);
14136
14137 ctxt->recovery = recovery;
14138
14139 xmlParseDocument(ctxt);
14140
14141 if ((ctxt->wellFormed) || recovery) {
14142 ret = ctxt->myDoc;
14143 if ((ret != NULL) && (ctxt->input->buf != NULL)) {
14144 if (ctxt->input->buf->compressed > 0)
14145 ret->compression = 9;
14146 else
14147 ret->compression = ctxt->input->buf->compressed;
14148 }
14149 }
14150 else {
14151 ret = NULL;
14152 xmlFreeDoc(ctxt->myDoc);
14153 ctxt->myDoc = NULL;
14154 }
14155 if (sax != NULL)
14156 ctxt->sax = NULL;
14157 xmlFreeParserCtxt(ctxt);
14158
14159 return(ret);
14160 }
14161
14162 /**
14163 * xmlSAXParseFile:
14164 * @sax: the SAX handler block
14165 * @filename: the filename
14166 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14167 * documents
14168 *
14169 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14170 * compressed document is provided by default if found at compile-time.
14171 * It use the given SAX function block to handle the parsing callback.
14172 * If sax is NULL, fallback to the default DOM tree building routines.
14173 *
14174 * Returns the resulting document tree
14175 */
14176
14177 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)14178 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14179 int recovery) {
14180 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14181 }
14182
14183 /**
14184 * xmlRecoverDoc:
14185 * @cur: a pointer to an array of xmlChar
14186 *
14187 * parse an XML in-memory document and build a tree.
14188 * In the case the document is not Well Formed, a attempt to build a
14189 * tree is tried anyway
14190 *
14191 * Returns the resulting document tree or NULL in case of failure
14192 */
14193
14194 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)14195 xmlRecoverDoc(const xmlChar *cur) {
14196 return(xmlSAXParseDoc(NULL, cur, 1));
14197 }
14198
14199 /**
14200 * xmlParseFile:
14201 * @filename: the filename
14202 *
14203 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14204 * compressed document is provided by default if found at compile-time.
14205 *
14206 * Returns the resulting document tree if the file was wellformed,
14207 * NULL otherwise.
14208 */
14209
14210 xmlDocPtr
xmlParseFile(const char * filename)14211 xmlParseFile(const char *filename) {
14212 return(xmlSAXParseFile(NULL, filename, 0));
14213 }
14214
14215 /**
14216 * xmlRecoverFile:
14217 * @filename: the filename
14218 *
14219 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14220 * compressed document is provided by default if found at compile-time.
14221 * In the case the document is not Well Formed, it attempts to build
14222 * a tree anyway
14223 *
14224 * Returns the resulting document tree or NULL in case of failure
14225 */
14226
14227 xmlDocPtr
xmlRecoverFile(const char * filename)14228 xmlRecoverFile(const char *filename) {
14229 return(xmlSAXParseFile(NULL, filename, 1));
14230 }
14231
14232
14233 /**
14234 * xmlSetupParserForBuffer:
14235 * @ctxt: an XML parser context
14236 * @buffer: a xmlChar * buffer
14237 * @filename: a file name
14238 *
14239 * Setup the parser context to parse a new buffer; Clears any prior
14240 * contents from the parser context. The buffer parameter must not be
14241 * NULL, but the filename parameter can be
14242 */
14243 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)14244 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14245 const char* filename)
14246 {
14247 xmlParserInputPtr input;
14248
14249 if ((ctxt == NULL) || (buffer == NULL))
14250 return;
14251
14252 input = xmlNewInputStream(ctxt);
14253 if (input == NULL) {
14254 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14255 xmlClearParserCtxt(ctxt);
14256 return;
14257 }
14258
14259 xmlClearParserCtxt(ctxt);
14260 if (filename != NULL)
14261 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14262 input->base = buffer;
14263 input->cur = buffer;
14264 input->end = &buffer[xmlStrlen(buffer)];
14265 inputPush(ctxt, input);
14266 }
14267
14268 /**
14269 * xmlSAXUserParseFile:
14270 * @sax: a SAX handler
14271 * @user_data: The user data returned on SAX callbacks
14272 * @filename: a file name
14273 *
14274 * parse an XML file and call the given SAX handler routines.
14275 * Automatic support for ZLIB/Compress compressed document is provided
14276 *
14277 * Returns 0 in case of success or a error number otherwise
14278 */
14279 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)14280 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14281 const char *filename) {
14282 int ret = 0;
14283 xmlParserCtxtPtr ctxt;
14284
14285 ctxt = xmlCreateFileParserCtxt(filename);
14286 if (ctxt == NULL) return -1;
14287 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14288 xmlFree(ctxt->sax);
14289 ctxt->sax = sax;
14290 xmlDetectSAX2(ctxt);
14291
14292 if (user_data != NULL)
14293 ctxt->userData = user_data;
14294
14295 xmlParseDocument(ctxt);
14296
14297 if (ctxt->wellFormed)
14298 ret = 0;
14299 else {
14300 if (ctxt->errNo != 0)
14301 ret = ctxt->errNo;
14302 else
14303 ret = -1;
14304 }
14305 if (sax != NULL)
14306 ctxt->sax = NULL;
14307 if (ctxt->myDoc != NULL) {
14308 xmlFreeDoc(ctxt->myDoc);
14309 ctxt->myDoc = NULL;
14310 }
14311 xmlFreeParserCtxt(ctxt);
14312
14313 return ret;
14314 }
14315 #endif /* LIBXML_SAX1_ENABLED */
14316
14317 /************************************************************************
14318 * *
14319 * Front ends when parsing from memory *
14320 * *
14321 ************************************************************************/
14322
14323 /**
14324 * xmlCreateMemoryParserCtxt:
14325 * @buffer: a pointer to a char array
14326 * @size: the size of the array
14327 *
14328 * Create a parser context for an XML in-memory document.
14329 *
14330 * Returns the new parser context or NULL
14331 */
14332 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)14333 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14334 xmlParserCtxtPtr ctxt;
14335 xmlParserInputPtr input;
14336 xmlParserInputBufferPtr buf;
14337
14338 if (buffer == NULL)
14339 return(NULL);
14340 if (size <= 0)
14341 return(NULL);
14342
14343 ctxt = xmlNewParserCtxt();
14344 if (ctxt == NULL)
14345 return(NULL);
14346
14347 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14348 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14349 if (buf == NULL) {
14350 xmlFreeParserCtxt(ctxt);
14351 return(NULL);
14352 }
14353
14354 input = xmlNewInputStream(ctxt);
14355 if (input == NULL) {
14356 xmlFreeParserInputBuffer(buf);
14357 xmlFreeParserCtxt(ctxt);
14358 return(NULL);
14359 }
14360
14361 input->filename = NULL;
14362 input->buf = buf;
14363 xmlBufResetInput(input->buf->buffer, input);
14364
14365 inputPush(ctxt, input);
14366 return(ctxt);
14367 }
14368
14369 #ifdef LIBXML_SAX1_ENABLED
14370 /**
14371 * xmlSAXParseMemoryWithData:
14372 * @sax: the SAX handler block
14373 * @buffer: an pointer to a char array
14374 * @size: the size of the array
14375 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14376 * documents
14377 * @data: the userdata
14378 *
14379 * parse an XML in-memory block and use the given SAX function block
14380 * to handle the parsing callback. If sax is NULL, fallback to the default
14381 * DOM tree building routines.
14382 *
14383 * User data (void *) is stored within the parser context in the
14384 * context's _private member, so it is available nearly everywhere in libxml
14385 *
14386 * Returns the resulting document tree
14387 */
14388
14389 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)14390 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14391 int size, int recovery, void *data) {
14392 xmlDocPtr ret;
14393 xmlParserCtxtPtr ctxt;
14394
14395 xmlInitParser();
14396
14397 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14398 if (ctxt == NULL) return(NULL);
14399 if (sax != NULL) {
14400 if (ctxt->sax != NULL)
14401 xmlFree(ctxt->sax);
14402 ctxt->sax = sax;
14403 }
14404 xmlDetectSAX2(ctxt);
14405 if (data!=NULL) {
14406 ctxt->_private=data;
14407 }
14408
14409 ctxt->recovery = recovery;
14410
14411 xmlParseDocument(ctxt);
14412
14413 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14414 else {
14415 ret = NULL;
14416 xmlFreeDoc(ctxt->myDoc);
14417 ctxt->myDoc = NULL;
14418 }
14419 if (sax != NULL)
14420 ctxt->sax = NULL;
14421 xmlFreeParserCtxt(ctxt);
14422
14423 return(ret);
14424 }
14425
14426 /**
14427 * xmlSAXParseMemory:
14428 * @sax: the SAX handler block
14429 * @buffer: an pointer to a char array
14430 * @size: the size of the array
14431 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14432 * documents
14433 *
14434 * parse an XML in-memory block and use the given SAX function block
14435 * to handle the parsing callback. If sax is NULL, fallback to the default
14436 * DOM tree building routines.
14437 *
14438 * Returns the resulting document tree
14439 */
14440 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)14441 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14442 int size, int recovery) {
14443 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14444 }
14445
14446 /**
14447 * xmlParseMemory:
14448 * @buffer: an pointer to a char array
14449 * @size: the size of the array
14450 *
14451 * parse an XML in-memory block and build a tree.
14452 *
14453 * Returns the resulting document tree
14454 */
14455
xmlParseMemory(const char * buffer,int size)14456 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14457 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14458 }
14459
14460 /**
14461 * xmlRecoverMemory:
14462 * @buffer: an pointer to a char array
14463 * @size: the size of the array
14464 *
14465 * parse an XML in-memory block and build a tree.
14466 * In the case the document is not Well Formed, an attempt to
14467 * build a tree is tried anyway
14468 *
14469 * Returns the resulting document tree or NULL in case of error
14470 */
14471
xmlRecoverMemory(const char * buffer,int size)14472 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14473 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14474 }
14475
14476 /**
14477 * xmlSAXUserParseMemory:
14478 * @sax: a SAX handler
14479 * @user_data: The user data returned on SAX callbacks
14480 * @buffer: an in-memory XML document input
14481 * @size: the length of the XML document in bytes
14482 *
14483 * A better SAX parsing routine.
14484 * parse an XML in-memory buffer and call the given SAX handler routines.
14485 *
14486 * Returns 0 in case of success or a error number otherwise
14487 */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)14488 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14489 const char *buffer, int size) {
14490 int ret = 0;
14491 xmlParserCtxtPtr ctxt;
14492
14493 xmlInitParser();
14494
14495 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14496 if (ctxt == NULL) return -1;
14497 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14498 xmlFree(ctxt->sax);
14499 ctxt->sax = sax;
14500 xmlDetectSAX2(ctxt);
14501
14502 if (user_data != NULL)
14503 ctxt->userData = user_data;
14504
14505 xmlParseDocument(ctxt);
14506
14507 if (ctxt->wellFormed)
14508 ret = 0;
14509 else {
14510 if (ctxt->errNo != 0)
14511 ret = ctxt->errNo;
14512 else
14513 ret = -1;
14514 }
14515 if (sax != NULL)
14516 ctxt->sax = NULL;
14517 if (ctxt->myDoc != NULL) {
14518 xmlFreeDoc(ctxt->myDoc);
14519 ctxt->myDoc = NULL;
14520 }
14521 xmlFreeParserCtxt(ctxt);
14522
14523 return ret;
14524 }
14525 #endif /* LIBXML_SAX1_ENABLED */
14526
14527 /**
14528 * xmlCreateDocParserCtxt:
14529 * @cur: a pointer to an array of xmlChar
14530 *
14531 * Creates a parser context for an XML in-memory document.
14532 *
14533 * Returns the new parser context or NULL
14534 */
14535 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * cur)14536 xmlCreateDocParserCtxt(const xmlChar *cur) {
14537 int len;
14538
14539 if (cur == NULL)
14540 return(NULL);
14541 len = xmlStrlen(cur);
14542 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14543 }
14544
14545 #ifdef LIBXML_SAX1_ENABLED
14546 /**
14547 * xmlSAXParseDoc:
14548 * @sax: the SAX handler block
14549 * @cur: a pointer to an array of xmlChar
14550 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14551 * documents
14552 *
14553 * parse an XML in-memory document and build a tree.
14554 * It use the given SAX function block to handle the parsing callback.
14555 * If sax is NULL, fallback to the default DOM tree building routines.
14556 *
14557 * Returns the resulting document tree
14558 */
14559
14560 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)14561 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14562 xmlDocPtr ret;
14563 xmlParserCtxtPtr ctxt;
14564 xmlSAXHandlerPtr oldsax = NULL;
14565
14566 if (cur == NULL) return(NULL);
14567
14568
14569 ctxt = xmlCreateDocParserCtxt(cur);
14570 if (ctxt == NULL) return(NULL);
14571 if (sax != NULL) {
14572 oldsax = ctxt->sax;
14573 ctxt->sax = sax;
14574 ctxt->userData = NULL;
14575 }
14576 xmlDetectSAX2(ctxt);
14577
14578 xmlParseDocument(ctxt);
14579 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14580 else {
14581 ret = NULL;
14582 xmlFreeDoc(ctxt->myDoc);
14583 ctxt->myDoc = NULL;
14584 }
14585 if (sax != NULL)
14586 ctxt->sax = oldsax;
14587 xmlFreeParserCtxt(ctxt);
14588
14589 return(ret);
14590 }
14591
14592 /**
14593 * xmlParseDoc:
14594 * @cur: a pointer to an array of xmlChar
14595 *
14596 * parse an XML in-memory document and build a tree.
14597 *
14598 * Returns the resulting document tree
14599 */
14600
14601 xmlDocPtr
xmlParseDoc(const xmlChar * cur)14602 xmlParseDoc(const xmlChar *cur) {
14603 return(xmlSAXParseDoc(NULL, cur, 0));
14604 }
14605 #endif /* LIBXML_SAX1_ENABLED */
14606
14607 #ifdef LIBXML_LEGACY_ENABLED
14608 /************************************************************************
14609 * *
14610 * Specific function to keep track of entities references *
14611 * and used by the XSLT debugger *
14612 * *
14613 ************************************************************************/
14614
14615 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14616
14617 /**
14618 * xmlAddEntityReference:
14619 * @ent : A valid entity
14620 * @firstNode : A valid first node for children of entity
14621 * @lastNode : A valid last node of children entity
14622 *
14623 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14624 */
14625 static void
xmlAddEntityReference(xmlEntityPtr ent,xmlNodePtr firstNode,xmlNodePtr lastNode)14626 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14627 xmlNodePtr lastNode)
14628 {
14629 if (xmlEntityRefFunc != NULL) {
14630 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14631 }
14632 }
14633
14634
14635 /**
14636 * xmlSetEntityReferenceFunc:
14637 * @func: A valid function
14638 *
14639 * Set the function to call call back when a xml reference has been made
14640 */
14641 void
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)14642 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14643 {
14644 xmlEntityRefFunc = func;
14645 }
14646 #endif /* LIBXML_LEGACY_ENABLED */
14647
14648 /************************************************************************
14649 * *
14650 * Miscellaneous *
14651 * *
14652 ************************************************************************/
14653
14654 #ifdef LIBXML_XPATH_ENABLED
14655 #include <libxml/xpath.h>
14656 #endif
14657
14658 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14659 static int xmlParserInitialized = 0;
14660
14661 /**
14662 * xmlInitParser:
14663 *
14664 * Initialization function for the XML parser.
14665 * This is not reentrant. Call once before processing in case of
14666 * use in multithreaded programs.
14667 */
14668
14669 void
xmlInitParser(void)14670 xmlInitParser(void) {
14671 if (xmlParserInitialized != 0)
14672 return;
14673
14674 #if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14675 atexit(xmlCleanupParser);
14676 #endif
14677
14678 #ifdef LIBXML_THREAD_ENABLED
14679 __xmlGlobalInitMutexLock();
14680 if (xmlParserInitialized == 0) {
14681 #endif
14682 xmlInitThreads();
14683 xmlInitGlobals();
14684 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14685 (xmlGenericError == NULL))
14686 initGenericErrorDefaultFunc(NULL);
14687 xmlInitMemory();
14688 xmlInitializeDict();
14689 xmlInitCharEncodingHandlers();
14690 xmlDefaultSAXHandlerInit();
14691 xmlRegisterDefaultInputCallbacks();
14692 #ifdef LIBXML_OUTPUT_ENABLED
14693 xmlRegisterDefaultOutputCallbacks();
14694 #endif /* LIBXML_OUTPUT_ENABLED */
14695 #ifdef LIBXML_HTML_ENABLED
14696 htmlInitAutoClose();
14697 htmlDefaultSAXHandlerInit();
14698 #endif
14699 #ifdef LIBXML_XPATH_ENABLED
14700 xmlXPathInit();
14701 #endif
14702 xmlParserInitialized = 1;
14703 #ifdef LIBXML_THREAD_ENABLED
14704 }
14705 __xmlGlobalInitMutexUnlock();
14706 #endif
14707 }
14708
14709 /**
14710 * xmlCleanupParser:
14711 *
14712 * This function name is somewhat misleading. It does not clean up
14713 * parser state, it cleans up memory allocated by the library itself.
14714 * It is a cleanup function for the XML library. It tries to reclaim all
14715 * related global memory allocated for the library processing.
14716 * It doesn't deallocate any document related memory. One should
14717 * call xmlCleanupParser() only when the process has finished using
14718 * the library and all XML/HTML documents built with it.
14719 * See also xmlInitParser() which has the opposite function of preparing
14720 * the library for operations.
14721 *
14722 * WARNING: if your application is multithreaded or has plugin support
14723 * calling this may crash the application if another thread or
14724 * a plugin is still using libxml2. It's sometimes very hard to
14725 * guess if libxml2 is in use in the application, some libraries
14726 * or plugins may use it without notice. In case of doubt abstain
14727 * from calling this function or do it just before calling exit()
14728 * to avoid leak reports from valgrind !
14729 */
14730
14731 void
xmlCleanupParser(void)14732 xmlCleanupParser(void) {
14733 if (!xmlParserInitialized)
14734 return;
14735
14736 xmlCleanupCharEncodingHandlers();
14737 #ifdef LIBXML_CATALOG_ENABLED
14738 xmlCatalogCleanup();
14739 #endif
14740 xmlDictCleanup();
14741 xmlCleanupInputCallbacks();
14742 #ifdef LIBXML_OUTPUT_ENABLED
14743 xmlCleanupOutputCallbacks();
14744 #endif
14745 #ifdef LIBXML_SCHEMAS_ENABLED
14746 xmlSchemaCleanupTypes();
14747 xmlRelaxNGCleanupTypes();
14748 #endif
14749 xmlResetLastError();
14750 xmlCleanupGlobals();
14751 xmlCleanupThreads(); /* must be last if called not from the main thread */
14752 xmlCleanupMemory();
14753 xmlParserInitialized = 0;
14754 }
14755
14756 #if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14757 !defined(_WIN32)
14758 static void
14759 ATTRIBUTE_DESTRUCTOR
xmlDestructor(void)14760 xmlDestructor(void) {
14761 /*
14762 * Calling custom deallocation functions in a destructor can cause
14763 * problems, for example with Nokogiri.
14764 */
14765 if (xmlFree == free)
14766 xmlCleanupParser();
14767 }
14768 #endif
14769
14770 /************************************************************************
14771 * *
14772 * New set (2.6.0) of simpler and more flexible APIs *
14773 * *
14774 ************************************************************************/
14775
14776 /**
14777 * DICT_FREE:
14778 * @str: a string
14779 *
14780 * Free a string if it is not owned by the "dict" dictionary in the
14781 * current scope
14782 */
14783 #define DICT_FREE(str) \
14784 if ((str) && ((!dict) || \
14785 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14786 xmlFree((char *)(str));
14787
14788 /**
14789 * xmlCtxtReset:
14790 * @ctxt: an XML parser context
14791 *
14792 * Reset a parser context
14793 */
14794 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)14795 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14796 {
14797 xmlParserInputPtr input;
14798 xmlDictPtr dict;
14799
14800 if (ctxt == NULL)
14801 return;
14802
14803 dict = ctxt->dict;
14804
14805 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14806 xmlFreeInputStream(input);
14807 }
14808 ctxt->inputNr = 0;
14809 ctxt->input = NULL;
14810
14811 ctxt->spaceNr = 0;
14812 if (ctxt->spaceTab != NULL) {
14813 ctxt->spaceTab[0] = -1;
14814 ctxt->space = &ctxt->spaceTab[0];
14815 } else {
14816 ctxt->space = NULL;
14817 }
14818
14819
14820 ctxt->nodeNr = 0;
14821 ctxt->node = NULL;
14822
14823 ctxt->nameNr = 0;
14824 ctxt->name = NULL;
14825
14826 DICT_FREE(ctxt->version);
14827 ctxt->version = NULL;
14828 DICT_FREE(ctxt->encoding);
14829 ctxt->encoding = NULL;
14830 DICT_FREE(ctxt->directory);
14831 ctxt->directory = NULL;
14832 DICT_FREE(ctxt->extSubURI);
14833 ctxt->extSubURI = NULL;
14834 DICT_FREE(ctxt->extSubSystem);
14835 ctxt->extSubSystem = NULL;
14836 if (ctxt->myDoc != NULL)
14837 xmlFreeDoc(ctxt->myDoc);
14838 ctxt->myDoc = NULL;
14839
14840 ctxt->standalone = -1;
14841 ctxt->hasExternalSubset = 0;
14842 ctxt->hasPErefs = 0;
14843 ctxt->html = 0;
14844 ctxt->external = 0;
14845 ctxt->instate = XML_PARSER_START;
14846 ctxt->token = 0;
14847
14848 ctxt->wellFormed = 1;
14849 ctxt->nsWellFormed = 1;
14850 ctxt->disableSAX = 0;
14851 ctxt->valid = 1;
14852 #if 0
14853 ctxt->vctxt.userData = ctxt;
14854 ctxt->vctxt.error = xmlParserValidityError;
14855 ctxt->vctxt.warning = xmlParserValidityWarning;
14856 #endif
14857 ctxt->record_info = 0;
14858 ctxt->checkIndex = 0;
14859 ctxt->inSubset = 0;
14860 ctxt->errNo = XML_ERR_OK;
14861 ctxt->depth = 0;
14862 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14863 ctxt->catalogs = NULL;
14864 ctxt->nbentities = 0;
14865 ctxt->sizeentities = 0;
14866 ctxt->sizeentcopy = 0;
14867 xmlInitNodeInfoSeq(&ctxt->node_seq);
14868
14869 if (ctxt->attsDefault != NULL) {
14870 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14871 ctxt->attsDefault = NULL;
14872 }
14873 if (ctxt->attsSpecial != NULL) {
14874 xmlHashFree(ctxt->attsSpecial, NULL);
14875 ctxt->attsSpecial = NULL;
14876 }
14877
14878 #ifdef LIBXML_CATALOG_ENABLED
14879 if (ctxt->catalogs != NULL)
14880 xmlCatalogFreeLocal(ctxt->catalogs);
14881 #endif
14882 if (ctxt->lastError.code != XML_ERR_OK)
14883 xmlResetError(&ctxt->lastError);
14884 }
14885
14886 /**
14887 * xmlCtxtResetPush:
14888 * @ctxt: an XML parser context
14889 * @chunk: a pointer to an array of chars
14890 * @size: number of chars in the array
14891 * @filename: an optional file name or URI
14892 * @encoding: the document encoding, or NULL
14893 *
14894 * Reset a push parser context
14895 *
14896 * Returns 0 in case of success and 1 in case of error
14897 */
14898 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)14899 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14900 int size, const char *filename, const char *encoding)
14901 {
14902 xmlParserInputPtr inputStream;
14903 xmlParserInputBufferPtr buf;
14904 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14905
14906 if (ctxt == NULL)
14907 return(1);
14908
14909 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14910 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14911
14912 buf = xmlAllocParserInputBuffer(enc);
14913 if (buf == NULL)
14914 return(1);
14915
14916 if (ctxt == NULL) {
14917 xmlFreeParserInputBuffer(buf);
14918 return(1);
14919 }
14920
14921 xmlCtxtReset(ctxt);
14922
14923 if (filename == NULL) {
14924 ctxt->directory = NULL;
14925 } else {
14926 ctxt->directory = xmlParserGetDirectory(filename);
14927 }
14928
14929 inputStream = xmlNewInputStream(ctxt);
14930 if (inputStream == NULL) {
14931 xmlFreeParserInputBuffer(buf);
14932 return(1);
14933 }
14934
14935 if (filename == NULL)
14936 inputStream->filename = NULL;
14937 else
14938 inputStream->filename = (char *)
14939 xmlCanonicPath((const xmlChar *) filename);
14940 inputStream->buf = buf;
14941 xmlBufResetInput(buf->buffer, inputStream);
14942
14943 inputPush(ctxt, inputStream);
14944
14945 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14946 (ctxt->input->buf != NULL)) {
14947 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14948 size_t cur = ctxt->input->cur - ctxt->input->base;
14949
14950 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14951
14952 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14953 #ifdef DEBUG_PUSH
14954 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14955 #endif
14956 }
14957
14958 if (encoding != NULL) {
14959 xmlCharEncodingHandlerPtr hdlr;
14960
14961 if (ctxt->encoding != NULL)
14962 xmlFree((xmlChar *) ctxt->encoding);
14963 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14964
14965 hdlr = xmlFindCharEncodingHandler(encoding);
14966 if (hdlr != NULL) {
14967 xmlSwitchToEncoding(ctxt, hdlr);
14968 } else {
14969 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14970 "Unsupported encoding %s\n", BAD_CAST encoding);
14971 }
14972 } else if (enc != XML_CHAR_ENCODING_NONE) {
14973 xmlSwitchEncoding(ctxt, enc);
14974 }
14975
14976 return(0);
14977 }
14978
14979
14980 /**
14981 * xmlCtxtUseOptionsInternal:
14982 * @ctxt: an XML parser context
14983 * @options: a combination of xmlParserOption
14984 * @encoding: the user provided encoding to use
14985 *
14986 * Applies the options to the parser context
14987 *
14988 * Returns 0 in case of success, the set of unknown or unimplemented options
14989 * in case of error.
14990 */
14991 static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt,int options,const char * encoding)14992 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14993 {
14994 if (ctxt == NULL)
14995 return(-1);
14996 if (encoding != NULL) {
14997 if (ctxt->encoding != NULL)
14998 xmlFree((xmlChar *) ctxt->encoding);
14999 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15000 }
15001 if (options & XML_PARSE_RECOVER) {
15002 ctxt->recovery = 1;
15003 options -= XML_PARSE_RECOVER;
15004 ctxt->options |= XML_PARSE_RECOVER;
15005 } else
15006 ctxt->recovery = 0;
15007 if (options & XML_PARSE_DTDLOAD) {
15008 ctxt->loadsubset = XML_DETECT_IDS;
15009 options -= XML_PARSE_DTDLOAD;
15010 ctxt->options |= XML_PARSE_DTDLOAD;
15011 } else
15012 ctxt->loadsubset = 0;
15013 if (options & XML_PARSE_DTDATTR) {
15014 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15015 options -= XML_PARSE_DTDATTR;
15016 ctxt->options |= XML_PARSE_DTDATTR;
15017 }
15018 if (options & XML_PARSE_NOENT) {
15019 ctxt->replaceEntities = 1;
15020 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15021 options -= XML_PARSE_NOENT;
15022 ctxt->options |= XML_PARSE_NOENT;
15023 } else
15024 ctxt->replaceEntities = 0;
15025 if (options & XML_PARSE_PEDANTIC) {
15026 ctxt->pedantic = 1;
15027 options -= XML_PARSE_PEDANTIC;
15028 ctxt->options |= XML_PARSE_PEDANTIC;
15029 } else
15030 ctxt->pedantic = 0;
15031 if (options & XML_PARSE_NOBLANKS) {
15032 ctxt->keepBlanks = 0;
15033 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15034 options -= XML_PARSE_NOBLANKS;
15035 ctxt->options |= XML_PARSE_NOBLANKS;
15036 } else
15037 ctxt->keepBlanks = 1;
15038 if (options & XML_PARSE_DTDVALID) {
15039 ctxt->validate = 1;
15040 if (options & XML_PARSE_NOWARNING)
15041 ctxt->vctxt.warning = NULL;
15042 if (options & XML_PARSE_NOERROR)
15043 ctxt->vctxt.error = NULL;
15044 options -= XML_PARSE_DTDVALID;
15045 ctxt->options |= XML_PARSE_DTDVALID;
15046 } else
15047 ctxt->validate = 0;
15048 if (options & XML_PARSE_NOWARNING) {
15049 ctxt->sax->warning = NULL;
15050 options -= XML_PARSE_NOWARNING;
15051 }
15052 if (options & XML_PARSE_NOERROR) {
15053 ctxt->sax->error = NULL;
15054 ctxt->sax->fatalError = NULL;
15055 options -= XML_PARSE_NOERROR;
15056 }
15057 #ifdef LIBXML_SAX1_ENABLED
15058 if (options & XML_PARSE_SAX1) {
15059 ctxt->sax->startElement = xmlSAX2StartElement;
15060 ctxt->sax->endElement = xmlSAX2EndElement;
15061 ctxt->sax->startElementNs = NULL;
15062 ctxt->sax->endElementNs = NULL;
15063 ctxt->sax->initialized = 1;
15064 options -= XML_PARSE_SAX1;
15065 ctxt->options |= XML_PARSE_SAX1;
15066 }
15067 #endif /* LIBXML_SAX1_ENABLED */
15068 if (options & XML_PARSE_NODICT) {
15069 ctxt->dictNames = 0;
15070 options -= XML_PARSE_NODICT;
15071 ctxt->options |= XML_PARSE_NODICT;
15072 } else {
15073 ctxt->dictNames = 1;
15074 }
15075 if (options & XML_PARSE_NOCDATA) {
15076 ctxt->sax->cdataBlock = NULL;
15077 options -= XML_PARSE_NOCDATA;
15078 ctxt->options |= XML_PARSE_NOCDATA;
15079 }
15080 if (options & XML_PARSE_NSCLEAN) {
15081 ctxt->options |= XML_PARSE_NSCLEAN;
15082 options -= XML_PARSE_NSCLEAN;
15083 }
15084 if (options & XML_PARSE_NONET) {
15085 ctxt->options |= XML_PARSE_NONET;
15086 options -= XML_PARSE_NONET;
15087 }
15088 if (options & XML_PARSE_COMPACT) {
15089 ctxt->options |= XML_PARSE_COMPACT;
15090 options -= XML_PARSE_COMPACT;
15091 }
15092 if (options & XML_PARSE_OLD10) {
15093 ctxt->options |= XML_PARSE_OLD10;
15094 options -= XML_PARSE_OLD10;
15095 }
15096 if (options & XML_PARSE_NOBASEFIX) {
15097 ctxt->options |= XML_PARSE_NOBASEFIX;
15098 options -= XML_PARSE_NOBASEFIX;
15099 }
15100 if (options & XML_PARSE_HUGE) {
15101 ctxt->options |= XML_PARSE_HUGE;
15102 options -= XML_PARSE_HUGE;
15103 if (ctxt->dict != NULL)
15104 xmlDictSetLimit(ctxt->dict, 0);
15105 }
15106 if (options & XML_PARSE_OLDSAX) {
15107 ctxt->options |= XML_PARSE_OLDSAX;
15108 options -= XML_PARSE_OLDSAX;
15109 }
15110 if (options & XML_PARSE_IGNORE_ENC) {
15111 ctxt->options |= XML_PARSE_IGNORE_ENC;
15112 options -= XML_PARSE_IGNORE_ENC;
15113 }
15114 if (options & XML_PARSE_BIG_LINES) {
15115 ctxt->options |= XML_PARSE_BIG_LINES;
15116 options -= XML_PARSE_BIG_LINES;
15117 }
15118 ctxt->linenumbers = 1;
15119 return (options);
15120 }
15121
15122 /**
15123 * xmlCtxtUseOptions:
15124 * @ctxt: an XML parser context
15125 * @options: a combination of xmlParserOption
15126 *
15127 * Applies the options to the parser context
15128 *
15129 * Returns 0 in case of success, the set of unknown or unimplemented options
15130 * in case of error.
15131 */
15132 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)15133 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15134 {
15135 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15136 }
15137
15138 /**
15139 * xmlDoRead:
15140 * @ctxt: an XML parser context
15141 * @URL: the base URL to use for the document
15142 * @encoding: the document encoding, or NULL
15143 * @options: a combination of xmlParserOption
15144 * @reuse: keep the context for reuse
15145 *
15146 * Common front-end for the xmlRead functions
15147 *
15148 * Returns the resulting document tree or NULL
15149 */
15150 static xmlDocPtr
xmlDoRead(xmlParserCtxtPtr ctxt,const char * URL,const char * encoding,int options,int reuse)15151 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15152 int options, int reuse)
15153 {
15154 xmlDocPtr ret;
15155
15156 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15157 if (encoding != NULL) {
15158 xmlCharEncodingHandlerPtr hdlr;
15159
15160 hdlr = xmlFindCharEncodingHandler(encoding);
15161 if (hdlr != NULL)
15162 xmlSwitchToEncoding(ctxt, hdlr);
15163 }
15164 if ((URL != NULL) && (ctxt->input != NULL) &&
15165 (ctxt->input->filename == NULL))
15166 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15167 xmlParseDocument(ctxt);
15168 if ((ctxt->wellFormed) || ctxt->recovery)
15169 ret = ctxt->myDoc;
15170 else {
15171 ret = NULL;
15172 if (ctxt->myDoc != NULL) {
15173 xmlFreeDoc(ctxt->myDoc);
15174 }
15175 }
15176 ctxt->myDoc = NULL;
15177 if (!reuse) {
15178 xmlFreeParserCtxt(ctxt);
15179 }
15180
15181 return (ret);
15182 }
15183
15184 /**
15185 * xmlReadDoc:
15186 * @cur: a pointer to a zero terminated string
15187 * @URL: the base URL to use for the document
15188 * @encoding: the document encoding, or NULL
15189 * @options: a combination of xmlParserOption
15190 *
15191 * parse an XML in-memory document and build a tree.
15192 *
15193 * Returns the resulting document tree
15194 */
15195 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)15196 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15197 {
15198 xmlParserCtxtPtr ctxt;
15199
15200 if (cur == NULL)
15201 return (NULL);
15202 xmlInitParser();
15203
15204 ctxt = xmlCreateDocParserCtxt(cur);
15205 if (ctxt == NULL)
15206 return (NULL);
15207 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15208 }
15209
15210 /**
15211 * xmlReadFile:
15212 * @filename: a file or URL
15213 * @encoding: the document encoding, or NULL
15214 * @options: a combination of xmlParserOption
15215 *
15216 * parse an XML file from the filesystem or the network.
15217 *
15218 * Returns the resulting document tree
15219 */
15220 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)15221 xmlReadFile(const char *filename, const char *encoding, int options)
15222 {
15223 xmlParserCtxtPtr ctxt;
15224
15225 xmlInitParser();
15226 ctxt = xmlCreateURLParserCtxt(filename, options);
15227 if (ctxt == NULL)
15228 return (NULL);
15229 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15230 }
15231
15232 /**
15233 * xmlReadMemory:
15234 * @buffer: a pointer to a char array
15235 * @size: the size of the array
15236 * @URL: the base URL to use for the document
15237 * @encoding: the document encoding, or NULL
15238 * @options: a combination of xmlParserOption
15239 *
15240 * parse an XML in-memory document and build a tree.
15241 *
15242 * Returns the resulting document tree
15243 */
15244 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * URL,const char * encoding,int options)15245 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15246 {
15247 xmlParserCtxtPtr ctxt;
15248
15249 xmlInitParser();
15250 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15251 if (ctxt == NULL)
15252 return (NULL);
15253 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15254 }
15255
15256 /**
15257 * xmlReadFd:
15258 * @fd: an open file descriptor
15259 * @URL: the base URL to use for the document
15260 * @encoding: the document encoding, or NULL
15261 * @options: a combination of xmlParserOption
15262 *
15263 * parse an XML from a file descriptor and build a tree.
15264 * NOTE that the file descriptor will not be closed when the
15265 * reader is closed or reset.
15266 *
15267 * Returns the resulting document tree
15268 */
15269 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)15270 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15271 {
15272 xmlParserCtxtPtr ctxt;
15273 xmlParserInputBufferPtr input;
15274 xmlParserInputPtr stream;
15275
15276 if (fd < 0)
15277 return (NULL);
15278 xmlInitParser();
15279
15280 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15281 if (input == NULL)
15282 return (NULL);
15283 input->closecallback = NULL;
15284 ctxt = xmlNewParserCtxt();
15285 if (ctxt == NULL) {
15286 xmlFreeParserInputBuffer(input);
15287 return (NULL);
15288 }
15289 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15290 if (stream == NULL) {
15291 xmlFreeParserInputBuffer(input);
15292 xmlFreeParserCtxt(ctxt);
15293 return (NULL);
15294 }
15295 inputPush(ctxt, stream);
15296 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15297 }
15298
15299 /**
15300 * xmlReadIO:
15301 * @ioread: an I/O read function
15302 * @ioclose: an I/O close function
15303 * @ioctx: an I/O handler
15304 * @URL: the base URL to use for the document
15305 * @encoding: the document encoding, or NULL
15306 * @options: a combination of xmlParserOption
15307 *
15308 * parse an XML document from I/O functions and source and build a tree.
15309 *
15310 * Returns the resulting document tree
15311 */
15312 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15313 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15314 void *ioctx, const char *URL, const char *encoding, int options)
15315 {
15316 xmlParserCtxtPtr ctxt;
15317 xmlParserInputBufferPtr input;
15318 xmlParserInputPtr stream;
15319
15320 if (ioread == NULL)
15321 return (NULL);
15322 xmlInitParser();
15323
15324 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15325 XML_CHAR_ENCODING_NONE);
15326 if (input == NULL) {
15327 if (ioclose != NULL)
15328 ioclose(ioctx);
15329 return (NULL);
15330 }
15331 ctxt = xmlNewParserCtxt();
15332 if (ctxt == NULL) {
15333 xmlFreeParserInputBuffer(input);
15334 return (NULL);
15335 }
15336 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15337 if (stream == NULL) {
15338 xmlFreeParserInputBuffer(input);
15339 xmlFreeParserCtxt(ctxt);
15340 return (NULL);
15341 }
15342 inputPush(ctxt, stream);
15343 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15344 }
15345
15346 /**
15347 * xmlCtxtReadDoc:
15348 * @ctxt: an XML parser context
15349 * @cur: a pointer to a zero terminated string
15350 * @URL: the base URL to use for the document
15351 * @encoding: the document encoding, or NULL
15352 * @options: a combination of xmlParserOption
15353 *
15354 * parse an XML in-memory document and build a tree.
15355 * This reuses the existing @ctxt parser context
15356 *
15357 * Returns the resulting document tree
15358 */
15359 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * cur,const char * URL,const char * encoding,int options)15360 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15361 const char *URL, const char *encoding, int options)
15362 {
15363 xmlParserInputPtr stream;
15364
15365 if (cur == NULL)
15366 return (NULL);
15367 if (ctxt == NULL)
15368 return (NULL);
15369 xmlInitParser();
15370
15371 xmlCtxtReset(ctxt);
15372
15373 stream = xmlNewStringInputStream(ctxt, cur);
15374 if (stream == NULL) {
15375 return (NULL);
15376 }
15377 inputPush(ctxt, stream);
15378 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15379 }
15380
15381 /**
15382 * xmlCtxtReadFile:
15383 * @ctxt: an XML parser context
15384 * @filename: a file or URL
15385 * @encoding: the document encoding, or NULL
15386 * @options: a combination of xmlParserOption
15387 *
15388 * parse an XML file from the filesystem or the network.
15389 * This reuses the existing @ctxt parser context
15390 *
15391 * Returns the resulting document tree
15392 */
15393 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)15394 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15395 const char *encoding, int options)
15396 {
15397 xmlParserInputPtr stream;
15398
15399 if (filename == NULL)
15400 return (NULL);
15401 if (ctxt == NULL)
15402 return (NULL);
15403 xmlInitParser();
15404
15405 xmlCtxtReset(ctxt);
15406
15407 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15408 if (stream == NULL) {
15409 return (NULL);
15410 }
15411 inputPush(ctxt, stream);
15412 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15413 }
15414
15415 /**
15416 * xmlCtxtReadMemory:
15417 * @ctxt: an XML parser context
15418 * @buffer: a pointer to a char array
15419 * @size: the size of the array
15420 * @URL: the base URL to use for the document
15421 * @encoding: the document encoding, or NULL
15422 * @options: a combination of xmlParserOption
15423 *
15424 * parse an XML in-memory document and build a tree.
15425 * This reuses the existing @ctxt parser context
15426 *
15427 * Returns the resulting document tree
15428 */
15429 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)15430 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15431 const char *URL, const char *encoding, int options)
15432 {
15433 xmlParserInputBufferPtr input;
15434 xmlParserInputPtr stream;
15435
15436 if (ctxt == NULL)
15437 return (NULL);
15438 if (buffer == NULL)
15439 return (NULL);
15440 xmlInitParser();
15441
15442 xmlCtxtReset(ctxt);
15443
15444 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15445 if (input == NULL) {
15446 return(NULL);
15447 }
15448
15449 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15450 if (stream == NULL) {
15451 xmlFreeParserInputBuffer(input);
15452 return(NULL);
15453 }
15454
15455 inputPush(ctxt, stream);
15456 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15457 }
15458
15459 /**
15460 * xmlCtxtReadFd:
15461 * @ctxt: an XML parser context
15462 * @fd: an open file descriptor
15463 * @URL: the base URL to use for the document
15464 * @encoding: the document encoding, or NULL
15465 * @options: a combination of xmlParserOption
15466 *
15467 * parse an XML from a file descriptor and build a tree.
15468 * This reuses the existing @ctxt parser context
15469 * NOTE that the file descriptor will not be closed when the
15470 * reader is closed or reset.
15471 *
15472 * Returns the resulting document tree
15473 */
15474 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)15475 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15476 const char *URL, const char *encoding, int options)
15477 {
15478 xmlParserInputBufferPtr input;
15479 xmlParserInputPtr stream;
15480
15481 if (fd < 0)
15482 return (NULL);
15483 if (ctxt == NULL)
15484 return (NULL);
15485 xmlInitParser();
15486
15487 xmlCtxtReset(ctxt);
15488
15489
15490 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15491 if (input == NULL)
15492 return (NULL);
15493 input->closecallback = NULL;
15494 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15495 if (stream == NULL) {
15496 xmlFreeParserInputBuffer(input);
15497 return (NULL);
15498 }
15499 inputPush(ctxt, stream);
15500 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15501 }
15502
15503 /**
15504 * xmlCtxtReadIO:
15505 * @ctxt: an XML parser context
15506 * @ioread: an I/O read function
15507 * @ioclose: an I/O close function
15508 * @ioctx: an I/O handler
15509 * @URL: the base URL to use for the document
15510 * @encoding: the document encoding, or NULL
15511 * @options: a combination of xmlParserOption
15512 *
15513 * parse an XML document from I/O functions and source and build a tree.
15514 * This reuses the existing @ctxt parser context
15515 *
15516 * Returns the resulting document tree
15517 */
15518 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15519 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15520 xmlInputCloseCallback ioclose, void *ioctx,
15521 const char *URL,
15522 const char *encoding, int options)
15523 {
15524 xmlParserInputBufferPtr input;
15525 xmlParserInputPtr stream;
15526
15527 if (ioread == NULL)
15528 return (NULL);
15529 if (ctxt == NULL)
15530 return (NULL);
15531 xmlInitParser();
15532
15533 xmlCtxtReset(ctxt);
15534
15535 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15536 XML_CHAR_ENCODING_NONE);
15537 if (input == NULL) {
15538 if (ioclose != NULL)
15539 ioclose(ioctx);
15540 return (NULL);
15541 }
15542 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15543 if (stream == NULL) {
15544 xmlFreeParserInputBuffer(input);
15545 return (NULL);
15546 }
15547 inputPush(ctxt, stream);
15548 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15549 }
15550
15551 #define bottom_parser
15552 #include "elfgcchack.h"
15553