1 /*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * daniel@veillard.com
31 */
32
33 #define IN_LIBXML
34 #include "libxml.h"
35
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
38 #else
39 #define XML_DIR_SEP '/'
40 #endif
41
42 #include <stdlib.h>
43 #include <limits.h>
44 #include <string.h>
45 #include <stdarg.h>
46 #include <libxml/xmlmemory.h>
47 #include <libxml/threads.h>
48 #include <libxml/globals.h>
49 #include <libxml/tree.h>
50 #include <libxml/parser.h>
51 #include <libxml/parserInternals.h>
52 #include <libxml/valid.h>
53 #include <libxml/entities.h>
54 #include <libxml/xmlerror.h>
55 #include <libxml/encoding.h>
56 #include <libxml/xmlIO.h>
57 #include <libxml/uri.h>
58 #ifdef LIBXML_CATALOG_ENABLED
59 #include <libxml/catalog.h>
60 #endif
61 #ifdef LIBXML_SCHEMAS_ENABLED
62 #include <libxml/xmlschemastypes.h>
63 #include <libxml/relaxng.h>
64 #endif
65 #ifdef HAVE_CTYPE_H
66 #include <ctype.h>
67 #endif
68 #ifdef HAVE_STDLIB_H
69 #include <stdlib.h>
70 #endif
71 #ifdef HAVE_SYS_STAT_H
72 #include <sys/stat.h>
73 #endif
74 #ifdef HAVE_FCNTL_H
75 #include <fcntl.h>
76 #endif
77 #ifdef HAVE_UNISTD_H
78 #include <unistd.h>
79 #endif
80 #ifdef HAVE_ZLIB_H
81 #include <zlib.h>
82 #endif
83 #ifdef HAVE_LZMA_H
84 #include <lzma.h>
85 #endif
86
87 #include "buf.h"
88 #include "enc.h"
89
90 static void
91 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
93 static xmlParserCtxtPtr
94 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
97 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
98
99 /************************************************************************
100 * *
101 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
102 * *
103 ************************************************************************/
104
105 #define XML_PARSER_BIG_ENTITY 1000
106 #define XML_PARSER_LOT_ENTITY 5000
107
108 /*
109 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
110 * replacement over the size in byte of the input indicates that you have
111 * and eponential behaviour. A value of 10 correspond to at least 3 entity
112 * replacement per byte of input.
113 */
114 #define XML_PARSER_NON_LINEAR 10
115
116 /*
117 * xmlParserEntityCheck
118 *
119 * Function to check non-linear entity expansion behaviour
120 * This is here to detect and stop exponential linear entity expansion
121 * This is not a limitation of the parser but a safety
122 * boundary feature. It can be disabled with the XML_PARSE_HUGE
123 * parser option.
124 */
125 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,size_t size,xmlEntityPtr ent,size_t replacement)126 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
127 xmlEntityPtr ent, size_t replacement)
128 {
129 size_t consumed = 0;
130
131 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
132 return (0);
133 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
134 return (1);
135
136 /*
137 * This may look absurd but is needed to detect
138 * entities problems
139 */
140 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
141 (ent->content != NULL) && (ent->checked == 0)) {
142 unsigned long oldnbent = ctxt->nbentities;
143 xmlChar *rep;
144
145 ent->checked = 1;
146
147 rep = xmlStringDecodeEntities(ctxt, ent->content,
148 XML_SUBSTITUTE_REF, 0, 0, 0);
149
150 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
151 if (rep != NULL) {
152 if (xmlStrchr(rep, '<'))
153 ent->checked |= 1;
154 xmlFree(rep);
155 rep = NULL;
156 }
157 }
158 if (replacement != 0) {
159 if (replacement < XML_MAX_TEXT_LENGTH)
160 return(0);
161
162 /*
163 * If the volume of entity copy reaches 10 times the
164 * amount of parsed data and over the large text threshold
165 * then that's very likely to be an abuse.
166 */
167 if (ctxt->input != NULL) {
168 consumed = ctxt->input->consumed +
169 (ctxt->input->cur - ctxt->input->base);
170 }
171 consumed += ctxt->sizeentities;
172
173 if (replacement < XML_PARSER_NON_LINEAR * consumed)
174 return(0);
175 } else if (size != 0) {
176 /*
177 * Do the check based on the replacement size of the entity
178 */
179 if (size < XML_PARSER_BIG_ENTITY)
180 return(0);
181
182 /*
183 * A limit on the amount of text data reasonably used
184 */
185 if (ctxt->input != NULL) {
186 consumed = ctxt->input->consumed +
187 (ctxt->input->cur - ctxt->input->base);
188 }
189 consumed += ctxt->sizeentities;
190
191 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
192 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
193 return (0);
194 } else if (ent != NULL) {
195 /*
196 * use the number of parsed entities in the replacement
197 */
198 size = ent->checked / 2;
199
200 /*
201 * The amount of data parsed counting entities size only once
202 */
203 if (ctxt->input != NULL) {
204 consumed = ctxt->input->consumed +
205 (ctxt->input->cur - ctxt->input->base);
206 }
207 consumed += ctxt->sizeentities;
208
209 /*
210 * Check the density of entities for the amount of data
211 * knowing an entity reference will take at least 3 bytes
212 */
213 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
214 return (0);
215 } else {
216 /*
217 * strange we got no data for checking
218 */
219 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
220 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
221 (ctxt->nbentities <= 10000))
222 return (0);
223 }
224 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
225 return (1);
226 }
227
228 /**
229 * xmlParserMaxDepth:
230 *
231 * arbitrary depth limit for the XML documents that we allow to
232 * process. This is not a limitation of the parser but a safety
233 * boundary feature. It can be disabled with the XML_PARSE_HUGE
234 * parser option.
235 */
236 unsigned int xmlParserMaxDepth = 256;
237
238
239
240 #define SAX2 1
241 #define XML_PARSER_BIG_BUFFER_SIZE 300
242 #define XML_PARSER_BUFFER_SIZE 100
243 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
244
245 /**
246 * XML_PARSER_CHUNK_SIZE
247 *
248 * When calling GROW that's the minimal amount of data
249 * the parser expected to have received. It is not a hard
250 * limit but an optimization when reading strings like Names
251 * It is not strictly needed as long as inputs available characters
252 * are followed by 0, which should be provided by the I/O level
253 */
254 #define XML_PARSER_CHUNK_SIZE 100
255
256 /*
257 * List of XML prefixed PI allowed by W3C specs
258 */
259
260 static const char *xmlW3CPIs[] = {
261 "xml-stylesheet",
262 "xml-model",
263 NULL
264 };
265
266
267 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
268 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
269 const xmlChar **str);
270
271 static xmlParserErrors
272 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
273 xmlSAXHandlerPtr sax,
274 void *user_data, int depth, const xmlChar *URL,
275 const xmlChar *ID, xmlNodePtr *list);
276
277 static int
278 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
279 const char *encoding);
280 #ifdef LIBXML_LEGACY_ENABLED
281 static void
282 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
283 xmlNodePtr lastNode);
284 #endif /* LIBXML_LEGACY_ENABLED */
285
286 static xmlParserErrors
287 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
288 const xmlChar *string, void *user_data, xmlNodePtr *lst);
289
290 static int
291 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
292
293 /************************************************************************
294 * *
295 * Some factorized error routines *
296 * *
297 ************************************************************************/
298
299 /**
300 * xmlErrAttributeDup:
301 * @ctxt: an XML parser context
302 * @prefix: the attribute prefix
303 * @localname: the attribute localname
304 *
305 * Handle a redefinition of attribute error
306 */
307 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)308 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
309 const xmlChar * localname)
310 {
311 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
312 (ctxt->instate == XML_PARSER_EOF))
313 return;
314 if (ctxt != NULL)
315 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
316
317 if (prefix == NULL)
318 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
319 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
320 (const char *) localname, NULL, NULL, 0, 0,
321 "Attribute %s redefined\n", localname);
322 else
323 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
324 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
325 (const char *) prefix, (const char *) localname,
326 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
327 localname);
328 if (ctxt != NULL) {
329 ctxt->wellFormed = 0;
330 if (ctxt->recovery == 0)
331 ctxt->disableSAX = 1;
332 }
333 }
334
335 /**
336 * xmlFatalErr:
337 * @ctxt: an XML parser context
338 * @error: the error number
339 * @extra: extra information string
340 *
341 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
342 */
343 static void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * info)344 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
345 {
346 const char *errmsg;
347 char errstr[129] = "";
348
349 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
350 (ctxt->instate == XML_PARSER_EOF))
351 return;
352 switch (error) {
353 case XML_ERR_INVALID_HEX_CHARREF:
354 errmsg = "CharRef: invalid hexadecimal value";
355 break;
356 case XML_ERR_INVALID_DEC_CHARREF:
357 errmsg = "CharRef: invalid decimal value";
358 break;
359 case XML_ERR_INVALID_CHARREF:
360 errmsg = "CharRef: invalid value";
361 break;
362 case XML_ERR_INTERNAL_ERROR:
363 errmsg = "internal error";
364 break;
365 case XML_ERR_PEREF_AT_EOF:
366 errmsg = "PEReference at end of document";
367 break;
368 case XML_ERR_PEREF_IN_PROLOG:
369 errmsg = "PEReference in prolog";
370 break;
371 case XML_ERR_PEREF_IN_EPILOG:
372 errmsg = "PEReference in epilog";
373 break;
374 case XML_ERR_PEREF_NO_NAME:
375 errmsg = "PEReference: no name";
376 break;
377 case XML_ERR_PEREF_SEMICOL_MISSING:
378 errmsg = "PEReference: expecting ';'";
379 break;
380 case XML_ERR_ENTITY_LOOP:
381 errmsg = "Detected an entity reference loop";
382 break;
383 case XML_ERR_ENTITY_NOT_STARTED:
384 errmsg = "EntityValue: \" or ' expected";
385 break;
386 case XML_ERR_ENTITY_PE_INTERNAL:
387 errmsg = "PEReferences forbidden in internal subset";
388 break;
389 case XML_ERR_ENTITY_NOT_FINISHED:
390 errmsg = "EntityValue: \" or ' expected";
391 break;
392 case XML_ERR_ATTRIBUTE_NOT_STARTED:
393 errmsg = "AttValue: \" or ' expected";
394 break;
395 case XML_ERR_LT_IN_ATTRIBUTE:
396 errmsg = "Unescaped '<' not allowed in attributes values";
397 break;
398 case XML_ERR_LITERAL_NOT_STARTED:
399 errmsg = "SystemLiteral \" or ' expected";
400 break;
401 case XML_ERR_LITERAL_NOT_FINISHED:
402 errmsg = "Unfinished System or Public ID \" or ' expected";
403 break;
404 case XML_ERR_MISPLACED_CDATA_END:
405 errmsg = "Sequence ']]>' not allowed in content";
406 break;
407 case XML_ERR_URI_REQUIRED:
408 errmsg = "SYSTEM or PUBLIC, the URI is missing";
409 break;
410 case XML_ERR_PUBID_REQUIRED:
411 errmsg = "PUBLIC, the Public Identifier is missing";
412 break;
413 case XML_ERR_HYPHEN_IN_COMMENT:
414 errmsg = "Comment must not contain '--' (double-hyphen)";
415 break;
416 case XML_ERR_PI_NOT_STARTED:
417 errmsg = "xmlParsePI : no target name";
418 break;
419 case XML_ERR_RESERVED_XML_NAME:
420 errmsg = "Invalid PI name";
421 break;
422 case XML_ERR_NOTATION_NOT_STARTED:
423 errmsg = "NOTATION: Name expected here";
424 break;
425 case XML_ERR_NOTATION_NOT_FINISHED:
426 errmsg = "'>' required to close NOTATION declaration";
427 break;
428 case XML_ERR_VALUE_REQUIRED:
429 errmsg = "Entity value required";
430 break;
431 case XML_ERR_URI_FRAGMENT:
432 errmsg = "Fragment not allowed";
433 break;
434 case XML_ERR_ATTLIST_NOT_STARTED:
435 errmsg = "'(' required to start ATTLIST enumeration";
436 break;
437 case XML_ERR_NMTOKEN_REQUIRED:
438 errmsg = "NmToken expected in ATTLIST enumeration";
439 break;
440 case XML_ERR_ATTLIST_NOT_FINISHED:
441 errmsg = "')' required to finish ATTLIST enumeration";
442 break;
443 case XML_ERR_MIXED_NOT_STARTED:
444 errmsg = "MixedContentDecl : '|' or ')*' expected";
445 break;
446 case XML_ERR_PCDATA_REQUIRED:
447 errmsg = "MixedContentDecl : '#PCDATA' expected";
448 break;
449 case XML_ERR_ELEMCONTENT_NOT_STARTED:
450 errmsg = "ContentDecl : Name or '(' expected";
451 break;
452 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
453 errmsg = "ContentDecl : ',' '|' or ')' expected";
454 break;
455 case XML_ERR_PEREF_IN_INT_SUBSET:
456 errmsg =
457 "PEReference: forbidden within markup decl in internal subset";
458 break;
459 case XML_ERR_GT_REQUIRED:
460 errmsg = "expected '>'";
461 break;
462 case XML_ERR_CONDSEC_INVALID:
463 errmsg = "XML conditional section '[' expected";
464 break;
465 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
466 errmsg = "Content error in the external subset";
467 break;
468 case XML_ERR_CONDSEC_INVALID_KEYWORD:
469 errmsg =
470 "conditional section INCLUDE or IGNORE keyword expected";
471 break;
472 case XML_ERR_CONDSEC_NOT_FINISHED:
473 errmsg = "XML conditional section not closed";
474 break;
475 case XML_ERR_XMLDECL_NOT_STARTED:
476 errmsg = "Text declaration '<?xml' required";
477 break;
478 case XML_ERR_XMLDECL_NOT_FINISHED:
479 errmsg = "parsing XML declaration: '?>' expected";
480 break;
481 case XML_ERR_EXT_ENTITY_STANDALONE:
482 errmsg = "external parsed entities cannot be standalone";
483 break;
484 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
485 errmsg = "EntityRef: expecting ';'";
486 break;
487 case XML_ERR_DOCTYPE_NOT_FINISHED:
488 errmsg = "DOCTYPE improperly terminated";
489 break;
490 case XML_ERR_LTSLASH_REQUIRED:
491 errmsg = "EndTag: '</' not found";
492 break;
493 case XML_ERR_EQUAL_REQUIRED:
494 errmsg = "expected '='";
495 break;
496 case XML_ERR_STRING_NOT_CLOSED:
497 errmsg = "String not closed expecting \" or '";
498 break;
499 case XML_ERR_STRING_NOT_STARTED:
500 errmsg = "String not started expecting ' or \"";
501 break;
502 case XML_ERR_ENCODING_NAME:
503 errmsg = "Invalid XML encoding name";
504 break;
505 case XML_ERR_STANDALONE_VALUE:
506 errmsg = "standalone accepts only 'yes' or 'no'";
507 break;
508 case XML_ERR_DOCUMENT_EMPTY:
509 errmsg = "Document is empty";
510 break;
511 case XML_ERR_DOCUMENT_END:
512 errmsg = "Extra content at the end of the document";
513 break;
514 case XML_ERR_NOT_WELL_BALANCED:
515 errmsg = "chunk is not well balanced";
516 break;
517 case XML_ERR_EXTRA_CONTENT:
518 errmsg = "extra content at the end of well balanced chunk";
519 break;
520 case XML_ERR_VERSION_MISSING:
521 errmsg = "Malformed declaration expecting version";
522 break;
523 case XML_ERR_NAME_TOO_LONG:
524 errmsg = "Name too long use XML_PARSE_HUGE option";
525 break;
526 #if 0
527 case:
528 errmsg = "";
529 break;
530 #endif
531 default:
532 errmsg = "Unregistered error message";
533 }
534 if (info == NULL)
535 snprintf(errstr, 128, "%s\n", errmsg);
536 else
537 snprintf(errstr, 128, "%s: %%s\n", errmsg);
538 if (ctxt != NULL)
539 ctxt->errNo = error;
540 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
541 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
542 info);
543 if (ctxt != NULL) {
544 ctxt->wellFormed = 0;
545 if (ctxt->recovery == 0)
546 ctxt->disableSAX = 1;
547 }
548 }
549
550 /**
551 * xmlFatalErrMsg:
552 * @ctxt: an XML parser context
553 * @error: the error number
554 * @msg: the error message
555 *
556 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
557 */
558 static void
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)559 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
560 const char *msg)
561 {
562 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
563 (ctxt->instate == XML_PARSER_EOF))
564 return;
565 if (ctxt != NULL)
566 ctxt->errNo = error;
567 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
568 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
569 if (ctxt != NULL) {
570 ctxt->wellFormed = 0;
571 if (ctxt->recovery == 0)
572 ctxt->disableSAX = 1;
573 }
574 }
575
576 /**
577 * xmlWarningMsg:
578 * @ctxt: an XML parser context
579 * @error: the error number
580 * @msg: the error message
581 * @str1: extra data
582 * @str2: extra data
583 *
584 * Handle a warning.
585 */
586 static void
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)587 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
588 const char *msg, const xmlChar *str1, const xmlChar *str2)
589 {
590 xmlStructuredErrorFunc schannel = NULL;
591
592 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
593 (ctxt->instate == XML_PARSER_EOF))
594 return;
595 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
596 (ctxt->sax->initialized == XML_SAX2_MAGIC))
597 schannel = ctxt->sax->serror;
598 if (ctxt != NULL) {
599 __xmlRaiseError(schannel,
600 (ctxt->sax) ? ctxt->sax->warning : NULL,
601 ctxt->userData,
602 ctxt, NULL, XML_FROM_PARSER, error,
603 XML_ERR_WARNING, NULL, 0,
604 (const char *) str1, (const char *) str2, NULL, 0, 0,
605 msg, (const char *) str1, (const char *) str2);
606 } else {
607 __xmlRaiseError(schannel, NULL, NULL,
608 ctxt, NULL, XML_FROM_PARSER, error,
609 XML_ERR_WARNING, NULL, 0,
610 (const char *) str1, (const char *) str2, NULL, 0, 0,
611 msg, (const char *) str1, (const char *) str2);
612 }
613 }
614
615 /**
616 * xmlValidityError:
617 * @ctxt: an XML parser context
618 * @error: the error number
619 * @msg: the error message
620 * @str1: extra data
621 *
622 * Handle a validity error.
623 */
624 static void
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)625 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
626 const char *msg, const xmlChar *str1, const xmlChar *str2)
627 {
628 xmlStructuredErrorFunc schannel = NULL;
629
630 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
631 (ctxt->instate == XML_PARSER_EOF))
632 return;
633 if (ctxt != NULL) {
634 ctxt->errNo = error;
635 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
636 schannel = ctxt->sax->serror;
637 }
638 if (ctxt != NULL) {
639 __xmlRaiseError(schannel,
640 ctxt->vctxt.error, ctxt->vctxt.userData,
641 ctxt, NULL, XML_FROM_DTD, error,
642 XML_ERR_ERROR, NULL, 0, (const char *) str1,
643 (const char *) str2, NULL, 0, 0,
644 msg, (const char *) str1, (const char *) str2);
645 ctxt->valid = 0;
646 } else {
647 __xmlRaiseError(schannel, NULL, NULL,
648 ctxt, NULL, XML_FROM_DTD, error,
649 XML_ERR_ERROR, NULL, 0, (const char *) str1,
650 (const char *) str2, NULL, 0, 0,
651 msg, (const char *) str1, (const char *) str2);
652 }
653 }
654
655 /**
656 * xmlFatalErrMsgInt:
657 * @ctxt: an XML parser context
658 * @error: the error number
659 * @msg: the error message
660 * @val: an integer value
661 *
662 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
663 */
664 static void
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)665 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
666 const char *msg, int val)
667 {
668 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
669 (ctxt->instate == XML_PARSER_EOF))
670 return;
671 if (ctxt != NULL)
672 ctxt->errNo = error;
673 __xmlRaiseError(NULL, NULL, NULL,
674 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
675 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
676 if (ctxt != NULL) {
677 ctxt->wellFormed = 0;
678 if (ctxt->recovery == 0)
679 ctxt->disableSAX = 1;
680 }
681 }
682
683 /**
684 * xmlFatalErrMsgStrIntStr:
685 * @ctxt: an XML parser context
686 * @error: the error number
687 * @msg: the error message
688 * @str1: an string info
689 * @val: an integer value
690 * @str2: an string info
691 *
692 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
693 */
694 static void
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)695 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
696 const char *msg, const xmlChar *str1, int val,
697 const xmlChar *str2)
698 {
699 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
700 (ctxt->instate == XML_PARSER_EOF))
701 return;
702 if (ctxt != NULL)
703 ctxt->errNo = error;
704 __xmlRaiseError(NULL, NULL, NULL,
705 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
706 NULL, 0, (const char *) str1, (const char *) str2,
707 NULL, val, 0, msg, str1, val, str2);
708 if (ctxt != NULL) {
709 ctxt->wellFormed = 0;
710 if (ctxt->recovery == 0)
711 ctxt->disableSAX = 1;
712 }
713 }
714
715 /**
716 * xmlFatalErrMsgStr:
717 * @ctxt: an XML parser context
718 * @error: the error number
719 * @msg: the error message
720 * @val: a string value
721 *
722 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
723 */
724 static void
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)725 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
726 const char *msg, const xmlChar * val)
727 {
728 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
729 (ctxt->instate == XML_PARSER_EOF))
730 return;
731 if (ctxt != NULL)
732 ctxt->errNo = error;
733 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
734 XML_FROM_PARSER, error, XML_ERR_FATAL,
735 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
736 val);
737 if (ctxt != NULL) {
738 ctxt->wellFormed = 0;
739 if (ctxt->recovery == 0)
740 ctxt->disableSAX = 1;
741 }
742 }
743
744 /**
745 * xmlErrMsgStr:
746 * @ctxt: an XML parser context
747 * @error: the error number
748 * @msg: the error message
749 * @val: a string value
750 *
751 * Handle a non fatal parser error
752 */
753 static void
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)754 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
755 const char *msg, const xmlChar * val)
756 {
757 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
758 (ctxt->instate == XML_PARSER_EOF))
759 return;
760 if (ctxt != NULL)
761 ctxt->errNo = error;
762 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
763 XML_FROM_PARSER, error, XML_ERR_ERROR,
764 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
765 val);
766 }
767
768 /**
769 * xmlNsErr:
770 * @ctxt: an XML parser context
771 * @error: the error number
772 * @msg: the message
773 * @info1: extra information string
774 * @info2: extra information string
775 *
776 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
777 */
778 static void
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)779 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
780 const char *msg,
781 const xmlChar * info1, const xmlChar * info2,
782 const xmlChar * info3)
783 {
784 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
785 (ctxt->instate == XML_PARSER_EOF))
786 return;
787 if (ctxt != NULL)
788 ctxt->errNo = error;
789 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
790 XML_ERR_ERROR, NULL, 0, (const char *) info1,
791 (const char *) info2, (const char *) info3, 0, 0, msg,
792 info1, info2, info3);
793 if (ctxt != NULL)
794 ctxt->nsWellFormed = 0;
795 }
796
797 /**
798 * xmlNsWarn
799 * @ctxt: an XML parser context
800 * @error: the error number
801 * @msg: the message
802 * @info1: extra information string
803 * @info2: extra information string
804 *
805 * Handle a namespace warning error
806 */
807 static void
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)808 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
809 const char *msg,
810 const xmlChar * info1, const xmlChar * info2,
811 const xmlChar * info3)
812 {
813 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
814 (ctxt->instate == XML_PARSER_EOF))
815 return;
816 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
817 XML_ERR_WARNING, NULL, 0, (const char *) info1,
818 (const char *) info2, (const char *) info3, 0, 0, msg,
819 info1, info2, info3);
820 }
821
822 /************************************************************************
823 * *
824 * Library wide options *
825 * *
826 ************************************************************************/
827
828 /**
829 * xmlHasFeature:
830 * @feature: the feature to be examined
831 *
832 * Examines if the library has been compiled with a given feature.
833 *
834 * Returns a non-zero value if the feature exist, otherwise zero.
835 * Returns zero (0) if the feature does not exist or an unknown
836 * unknown feature is requested, non-zero otherwise.
837 */
838 int
xmlHasFeature(xmlFeature feature)839 xmlHasFeature(xmlFeature feature)
840 {
841 switch (feature) {
842 case XML_WITH_THREAD:
843 #ifdef LIBXML_THREAD_ENABLED
844 return(1);
845 #else
846 return(0);
847 #endif
848 case XML_WITH_TREE:
849 #ifdef LIBXML_TREE_ENABLED
850 return(1);
851 #else
852 return(0);
853 #endif
854 case XML_WITH_OUTPUT:
855 #ifdef LIBXML_OUTPUT_ENABLED
856 return(1);
857 #else
858 return(0);
859 #endif
860 case XML_WITH_PUSH:
861 #ifdef LIBXML_PUSH_ENABLED
862 return(1);
863 #else
864 return(0);
865 #endif
866 case XML_WITH_READER:
867 #ifdef LIBXML_READER_ENABLED
868 return(1);
869 #else
870 return(0);
871 #endif
872 case XML_WITH_PATTERN:
873 #ifdef LIBXML_PATTERN_ENABLED
874 return(1);
875 #else
876 return(0);
877 #endif
878 case XML_WITH_WRITER:
879 #ifdef LIBXML_WRITER_ENABLED
880 return(1);
881 #else
882 return(0);
883 #endif
884 case XML_WITH_SAX1:
885 #ifdef LIBXML_SAX1_ENABLED
886 return(1);
887 #else
888 return(0);
889 #endif
890 case XML_WITH_FTP:
891 #ifdef LIBXML_FTP_ENABLED
892 return(1);
893 #else
894 return(0);
895 #endif
896 case XML_WITH_HTTP:
897 #ifdef LIBXML_HTTP_ENABLED
898 return(1);
899 #else
900 return(0);
901 #endif
902 case XML_WITH_VALID:
903 #ifdef LIBXML_VALID_ENABLED
904 return(1);
905 #else
906 return(0);
907 #endif
908 case XML_WITH_HTML:
909 #ifdef LIBXML_HTML_ENABLED
910 return(1);
911 #else
912 return(0);
913 #endif
914 case XML_WITH_LEGACY:
915 #ifdef LIBXML_LEGACY_ENABLED
916 return(1);
917 #else
918 return(0);
919 #endif
920 case XML_WITH_C14N:
921 #ifdef LIBXML_C14N_ENABLED
922 return(1);
923 #else
924 return(0);
925 #endif
926 case XML_WITH_CATALOG:
927 #ifdef LIBXML_CATALOG_ENABLED
928 return(1);
929 #else
930 return(0);
931 #endif
932 case XML_WITH_XPATH:
933 #ifdef LIBXML_XPATH_ENABLED
934 return(1);
935 #else
936 return(0);
937 #endif
938 case XML_WITH_XPTR:
939 #ifdef LIBXML_XPTR_ENABLED
940 return(1);
941 #else
942 return(0);
943 #endif
944 case XML_WITH_XINCLUDE:
945 #ifdef LIBXML_XINCLUDE_ENABLED
946 return(1);
947 #else
948 return(0);
949 #endif
950 case XML_WITH_ICONV:
951 #ifdef LIBXML_ICONV_ENABLED
952 return(1);
953 #else
954 return(0);
955 #endif
956 case XML_WITH_ISO8859X:
957 #ifdef LIBXML_ISO8859X_ENABLED
958 return(1);
959 #else
960 return(0);
961 #endif
962 case XML_WITH_UNICODE:
963 #ifdef LIBXML_UNICODE_ENABLED
964 return(1);
965 #else
966 return(0);
967 #endif
968 case XML_WITH_REGEXP:
969 #ifdef LIBXML_REGEXP_ENABLED
970 return(1);
971 #else
972 return(0);
973 #endif
974 case XML_WITH_AUTOMATA:
975 #ifdef LIBXML_AUTOMATA_ENABLED
976 return(1);
977 #else
978 return(0);
979 #endif
980 case XML_WITH_EXPR:
981 #ifdef LIBXML_EXPR_ENABLED
982 return(1);
983 #else
984 return(0);
985 #endif
986 case XML_WITH_SCHEMAS:
987 #ifdef LIBXML_SCHEMAS_ENABLED
988 return(1);
989 #else
990 return(0);
991 #endif
992 case XML_WITH_SCHEMATRON:
993 #ifdef LIBXML_SCHEMATRON_ENABLED
994 return(1);
995 #else
996 return(0);
997 #endif
998 case XML_WITH_MODULES:
999 #ifdef LIBXML_MODULES_ENABLED
1000 return(1);
1001 #else
1002 return(0);
1003 #endif
1004 case XML_WITH_DEBUG:
1005 #ifdef LIBXML_DEBUG_ENABLED
1006 return(1);
1007 #else
1008 return(0);
1009 #endif
1010 case XML_WITH_DEBUG_MEM:
1011 #ifdef DEBUG_MEMORY_LOCATION
1012 return(1);
1013 #else
1014 return(0);
1015 #endif
1016 case XML_WITH_DEBUG_RUN:
1017 #ifdef LIBXML_DEBUG_RUNTIME
1018 return(1);
1019 #else
1020 return(0);
1021 #endif
1022 case XML_WITH_ZLIB:
1023 #ifdef LIBXML_ZLIB_ENABLED
1024 return(1);
1025 #else
1026 return(0);
1027 #endif
1028 case XML_WITH_LZMA:
1029 #ifdef LIBXML_LZMA_ENABLED
1030 return(1);
1031 #else
1032 return(0);
1033 #endif
1034 case XML_WITH_ICU:
1035 #ifdef LIBXML_ICU_ENABLED
1036 return(1);
1037 #else
1038 return(0);
1039 #endif
1040 default:
1041 break;
1042 }
1043 return(0);
1044 }
1045
1046 /************************************************************************
1047 * *
1048 * SAX2 defaulted attributes handling *
1049 * *
1050 ************************************************************************/
1051
1052 /**
1053 * xmlDetectSAX2:
1054 * @ctxt: an XML parser context
1055 *
1056 * Do the SAX2 detection and specific intialization
1057 */
1058 static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt)1059 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1060 if (ctxt == NULL) return;
1061 #ifdef LIBXML_SAX1_ENABLED
1062 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1063 ((ctxt->sax->startElementNs != NULL) ||
1064 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1065 #else
1066 ctxt->sax2 = 1;
1067 #endif /* LIBXML_SAX1_ENABLED */
1068
1069 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1070 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1071 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1072 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1073 (ctxt->str_xml_ns == NULL)) {
1074 xmlErrMemory(ctxt, NULL);
1075 }
1076 }
1077
1078 typedef struct _xmlDefAttrs xmlDefAttrs;
1079 typedef xmlDefAttrs *xmlDefAttrsPtr;
1080 struct _xmlDefAttrs {
1081 int nbAttrs; /* number of defaulted attributes on that element */
1082 int maxAttrs; /* the size of the array */
1083 const xmlChar *values[5]; /* array of localname/prefix/values/external */
1084 };
1085
1086 /**
1087 * xmlAttrNormalizeSpace:
1088 * @src: the source string
1089 * @dst: the target string
1090 *
1091 * Normalize the space in non CDATA attribute values:
1092 * If the attribute type is not CDATA, then the XML processor MUST further
1093 * process the normalized attribute value by discarding any leading and
1094 * trailing space (#x20) characters, and by replacing sequences of space
1095 * (#x20) characters by a single space (#x20) character.
1096 * Note that the size of dst need to be at least src, and if one doesn't need
1097 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1098 * passing src as dst is just fine.
1099 *
1100 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1101 * is needed.
1102 */
1103 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1104 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1105 {
1106 if ((src == NULL) || (dst == NULL))
1107 return(NULL);
1108
1109 while (*src == 0x20) src++;
1110 while (*src != 0) {
1111 if (*src == 0x20) {
1112 while (*src == 0x20) src++;
1113 if (*src != 0)
1114 *dst++ = 0x20;
1115 } else {
1116 *dst++ = *src++;
1117 }
1118 }
1119 *dst = 0;
1120 if (dst == src)
1121 return(NULL);
1122 return(dst);
1123 }
1124
1125 /**
1126 * xmlAttrNormalizeSpace2:
1127 * @src: the source string
1128 *
1129 * Normalize the space in non CDATA attribute values, a slightly more complex
1130 * front end to avoid allocation problems when running on attribute values
1131 * coming from the input.
1132 *
1133 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1134 * is needed.
1135 */
1136 static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt,xmlChar * src,int * len)1137 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1138 {
1139 int i;
1140 int remove_head = 0;
1141 int need_realloc = 0;
1142 const xmlChar *cur;
1143
1144 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1145 return(NULL);
1146 i = *len;
1147 if (i <= 0)
1148 return(NULL);
1149
1150 cur = src;
1151 while (*cur == 0x20) {
1152 cur++;
1153 remove_head++;
1154 }
1155 while (*cur != 0) {
1156 if (*cur == 0x20) {
1157 cur++;
1158 if ((*cur == 0x20) || (*cur == 0)) {
1159 need_realloc = 1;
1160 break;
1161 }
1162 } else
1163 cur++;
1164 }
1165 if (need_realloc) {
1166 xmlChar *ret;
1167
1168 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1169 if (ret == NULL) {
1170 xmlErrMemory(ctxt, NULL);
1171 return(NULL);
1172 }
1173 xmlAttrNormalizeSpace(ret, ret);
1174 *len = (int) strlen((const char *)ret);
1175 return(ret);
1176 } else if (remove_head) {
1177 *len -= remove_head;
1178 memmove(src, src + remove_head, 1 + *len);
1179 return(src);
1180 }
1181 return(NULL);
1182 }
1183
1184 /**
1185 * xmlAddDefAttrs:
1186 * @ctxt: an XML parser context
1187 * @fullname: the element fullname
1188 * @fullattr: the attribute fullname
1189 * @value: the attribute value
1190 *
1191 * Add a defaulted attribute for an element
1192 */
1193 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1194 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1195 const xmlChar *fullname,
1196 const xmlChar *fullattr,
1197 const xmlChar *value) {
1198 xmlDefAttrsPtr defaults;
1199 int len;
1200 const xmlChar *name;
1201 const xmlChar *prefix;
1202
1203 /*
1204 * Allows to detect attribute redefinitions
1205 */
1206 if (ctxt->attsSpecial != NULL) {
1207 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1208 return;
1209 }
1210
1211 if (ctxt->attsDefault == NULL) {
1212 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1213 if (ctxt->attsDefault == NULL)
1214 goto mem_error;
1215 }
1216
1217 /*
1218 * split the element name into prefix:localname , the string found
1219 * are within the DTD and then not associated to namespace names.
1220 */
1221 name = xmlSplitQName3(fullname, &len);
1222 if (name == NULL) {
1223 name = xmlDictLookup(ctxt->dict, fullname, -1);
1224 prefix = NULL;
1225 } else {
1226 name = xmlDictLookup(ctxt->dict, name, -1);
1227 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1228 }
1229
1230 /*
1231 * make sure there is some storage
1232 */
1233 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1234 if (defaults == NULL) {
1235 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1236 (4 * 5) * sizeof(const xmlChar *));
1237 if (defaults == NULL)
1238 goto mem_error;
1239 defaults->nbAttrs = 0;
1240 defaults->maxAttrs = 4;
1241 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1242 defaults, NULL) < 0) {
1243 xmlFree(defaults);
1244 goto mem_error;
1245 }
1246 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1247 xmlDefAttrsPtr temp;
1248
1249 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1250 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1251 if (temp == NULL)
1252 goto mem_error;
1253 defaults = temp;
1254 defaults->maxAttrs *= 2;
1255 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1256 defaults, NULL) < 0) {
1257 xmlFree(defaults);
1258 goto mem_error;
1259 }
1260 }
1261
1262 /*
1263 * Split the element name into prefix:localname , the string found
1264 * are within the DTD and hen not associated to namespace names.
1265 */
1266 name = xmlSplitQName3(fullattr, &len);
1267 if (name == NULL) {
1268 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1269 prefix = NULL;
1270 } else {
1271 name = xmlDictLookup(ctxt->dict, name, -1);
1272 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1273 }
1274
1275 defaults->values[5 * defaults->nbAttrs] = name;
1276 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1277 /* intern the string and precompute the end */
1278 len = xmlStrlen(value);
1279 value = xmlDictLookup(ctxt->dict, value, len);
1280 defaults->values[5 * defaults->nbAttrs + 2] = value;
1281 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1282 if (ctxt->external)
1283 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1284 else
1285 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1286 defaults->nbAttrs++;
1287
1288 return;
1289
1290 mem_error:
1291 xmlErrMemory(ctxt, NULL);
1292 return;
1293 }
1294
1295 /**
1296 * xmlAddSpecialAttr:
1297 * @ctxt: an XML parser context
1298 * @fullname: the element fullname
1299 * @fullattr: the attribute fullname
1300 * @type: the attribute type
1301 *
1302 * Register this attribute type
1303 */
1304 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1305 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1306 const xmlChar *fullname,
1307 const xmlChar *fullattr,
1308 int type)
1309 {
1310 if (ctxt->attsSpecial == NULL) {
1311 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1312 if (ctxt->attsSpecial == NULL)
1313 goto mem_error;
1314 }
1315
1316 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1317 return;
1318
1319 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1320 (void *) (long) type);
1321 return;
1322
1323 mem_error:
1324 xmlErrMemory(ctxt, NULL);
1325 return;
1326 }
1327
1328 /**
1329 * xmlCleanSpecialAttrCallback:
1330 *
1331 * Removes CDATA attributes from the special attribute table
1332 */
1333 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1334 xmlCleanSpecialAttrCallback(void *payload, void *data,
1335 const xmlChar *fullname, const xmlChar *fullattr,
1336 const xmlChar *unused ATTRIBUTE_UNUSED) {
1337 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1338
1339 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1340 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1341 }
1342 }
1343
1344 /**
1345 * xmlCleanSpecialAttr:
1346 * @ctxt: an XML parser context
1347 *
1348 * Trim the list of attributes defined to remove all those of type
1349 * CDATA as they are not special. This call should be done when finishing
1350 * to parse the DTD and before starting to parse the document root.
1351 */
1352 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1353 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1354 {
1355 if (ctxt->attsSpecial == NULL)
1356 return;
1357
1358 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1359
1360 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1361 xmlHashFree(ctxt->attsSpecial, NULL);
1362 ctxt->attsSpecial = NULL;
1363 }
1364 return;
1365 }
1366
1367 /**
1368 * xmlCheckLanguageID:
1369 * @lang: pointer to the string value
1370 *
1371 * Checks that the value conforms to the LanguageID production:
1372 *
1373 * NOTE: this is somewhat deprecated, those productions were removed from
1374 * the XML Second edition.
1375 *
1376 * [33] LanguageID ::= Langcode ('-' Subcode)*
1377 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1378 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1379 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1380 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1381 * [38] Subcode ::= ([a-z] | [A-Z])+
1382 *
1383 * The current REC reference the sucessors of RFC 1766, currently 5646
1384 *
1385 * http://www.rfc-editor.org/rfc/rfc5646.txt
1386 * langtag = language
1387 * ["-" script]
1388 * ["-" region]
1389 * *("-" variant)
1390 * *("-" extension)
1391 * ["-" privateuse]
1392 * language = 2*3ALPHA ; shortest ISO 639 code
1393 * ["-" extlang] ; sometimes followed by
1394 * ; extended language subtags
1395 * / 4ALPHA ; or reserved for future use
1396 * / 5*8ALPHA ; or registered language subtag
1397 *
1398 * extlang = 3ALPHA ; selected ISO 639 codes
1399 * *2("-" 3ALPHA) ; permanently reserved
1400 *
1401 * script = 4ALPHA ; ISO 15924 code
1402 *
1403 * region = 2ALPHA ; ISO 3166-1 code
1404 * / 3DIGIT ; UN M.49 code
1405 *
1406 * variant = 5*8alphanum ; registered variants
1407 * / (DIGIT 3alphanum)
1408 *
1409 * extension = singleton 1*("-" (2*8alphanum))
1410 *
1411 * ; Single alphanumerics
1412 * ; "x" reserved for private use
1413 * singleton = DIGIT ; 0 - 9
1414 * / %x41-57 ; A - W
1415 * / %x59-5A ; Y - Z
1416 * / %x61-77 ; a - w
1417 * / %x79-7A ; y - z
1418 *
1419 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1420 * The parser below doesn't try to cope with extension or privateuse
1421 * that could be added but that's not interoperable anyway
1422 *
1423 * Returns 1 if correct 0 otherwise
1424 **/
1425 int
xmlCheckLanguageID(const xmlChar * lang)1426 xmlCheckLanguageID(const xmlChar * lang)
1427 {
1428 const xmlChar *cur = lang, *nxt;
1429
1430 if (cur == NULL)
1431 return (0);
1432 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1433 ((cur[0] == 'I') && (cur[1] == '-')) ||
1434 ((cur[0] == 'x') && (cur[1] == '-')) ||
1435 ((cur[0] == 'X') && (cur[1] == '-'))) {
1436 /*
1437 * Still allow IANA code and user code which were coming
1438 * from the previous version of the XML-1.0 specification
1439 * it's deprecated but we should not fail
1440 */
1441 cur += 2;
1442 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1443 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1444 cur++;
1445 return(cur[0] == 0);
1446 }
1447 nxt = cur;
1448 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1449 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1450 nxt++;
1451 if (nxt - cur >= 4) {
1452 /*
1453 * Reserved
1454 */
1455 if ((nxt - cur > 8) || (nxt[0] != 0))
1456 return(0);
1457 return(1);
1458 }
1459 if (nxt - cur < 2)
1460 return(0);
1461 /* we got an ISO 639 code */
1462 if (nxt[0] == 0)
1463 return(1);
1464 if (nxt[0] != '-')
1465 return(0);
1466
1467 nxt++;
1468 cur = nxt;
1469 /* now we can have extlang or script or region or variant */
1470 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1471 goto region_m49;
1472
1473 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1474 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1475 nxt++;
1476 if (nxt - cur == 4)
1477 goto script;
1478 if (nxt - cur == 2)
1479 goto region;
1480 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1481 goto variant;
1482 if (nxt - cur != 3)
1483 return(0);
1484 /* we parsed an extlang */
1485 if (nxt[0] == 0)
1486 return(1);
1487 if (nxt[0] != '-')
1488 return(0);
1489
1490 nxt++;
1491 cur = nxt;
1492 /* now we can have script or region or variant */
1493 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1494 goto region_m49;
1495
1496 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1497 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1498 nxt++;
1499 if (nxt - cur == 2)
1500 goto region;
1501 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1502 goto variant;
1503 if (nxt - cur != 4)
1504 return(0);
1505 /* we parsed a script */
1506 script:
1507 if (nxt[0] == 0)
1508 return(1);
1509 if (nxt[0] != '-')
1510 return(0);
1511
1512 nxt++;
1513 cur = nxt;
1514 /* now we can have region or variant */
1515 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1516 goto region_m49;
1517
1518 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1519 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1520 nxt++;
1521
1522 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1523 goto variant;
1524 if (nxt - cur != 2)
1525 return(0);
1526 /* we parsed a region */
1527 region:
1528 if (nxt[0] == 0)
1529 return(1);
1530 if (nxt[0] != '-')
1531 return(0);
1532
1533 nxt++;
1534 cur = nxt;
1535 /* now we can just have a variant */
1536 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1537 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1538 nxt++;
1539
1540 if ((nxt - cur < 5) || (nxt - cur > 8))
1541 return(0);
1542
1543 /* we parsed a variant */
1544 variant:
1545 if (nxt[0] == 0)
1546 return(1);
1547 if (nxt[0] != '-')
1548 return(0);
1549 /* extensions and private use subtags not checked */
1550 return (1);
1551
1552 region_m49:
1553 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1554 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1555 nxt += 3;
1556 goto region;
1557 }
1558 return(0);
1559 }
1560
1561 /************************************************************************
1562 * *
1563 * Parser stacks related functions and macros *
1564 * *
1565 ************************************************************************/
1566
1567 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1568 const xmlChar ** str);
1569
1570 #ifdef SAX2
1571 /**
1572 * nsPush:
1573 * @ctxt: an XML parser context
1574 * @prefix: the namespace prefix or NULL
1575 * @URL: the namespace name
1576 *
1577 * Pushes a new parser namespace on top of the ns stack
1578 *
1579 * Returns -1 in case of error, -2 if the namespace should be discarded
1580 * and the index in the stack otherwise.
1581 */
1582 static int
nsPush(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URL)1583 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1584 {
1585 if (ctxt->options & XML_PARSE_NSCLEAN) {
1586 int i;
1587 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1588 if (ctxt->nsTab[i] == prefix) {
1589 /* in scope */
1590 if (ctxt->nsTab[i + 1] == URL)
1591 return(-2);
1592 /* out of scope keep it */
1593 break;
1594 }
1595 }
1596 }
1597 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1598 ctxt->nsMax = 10;
1599 ctxt->nsNr = 0;
1600 ctxt->nsTab = (const xmlChar **)
1601 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1602 if (ctxt->nsTab == NULL) {
1603 xmlErrMemory(ctxt, NULL);
1604 ctxt->nsMax = 0;
1605 return (-1);
1606 }
1607 } else if (ctxt->nsNr >= ctxt->nsMax) {
1608 const xmlChar ** tmp;
1609 ctxt->nsMax *= 2;
1610 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1611 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1612 if (tmp == NULL) {
1613 xmlErrMemory(ctxt, NULL);
1614 ctxt->nsMax /= 2;
1615 return (-1);
1616 }
1617 ctxt->nsTab = tmp;
1618 }
1619 ctxt->nsTab[ctxt->nsNr++] = prefix;
1620 ctxt->nsTab[ctxt->nsNr++] = URL;
1621 return (ctxt->nsNr);
1622 }
1623 /**
1624 * nsPop:
1625 * @ctxt: an XML parser context
1626 * @nr: the number to pop
1627 *
1628 * Pops the top @nr parser prefix/namespace from the ns stack
1629 *
1630 * Returns the number of namespaces removed
1631 */
1632 static int
nsPop(xmlParserCtxtPtr ctxt,int nr)1633 nsPop(xmlParserCtxtPtr ctxt, int nr)
1634 {
1635 int i;
1636
1637 if (ctxt->nsTab == NULL) return(0);
1638 if (ctxt->nsNr < nr) {
1639 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1640 nr = ctxt->nsNr;
1641 }
1642 if (ctxt->nsNr <= 0)
1643 return (0);
1644
1645 for (i = 0;i < nr;i++) {
1646 ctxt->nsNr--;
1647 ctxt->nsTab[ctxt->nsNr] = NULL;
1648 }
1649 return(nr);
1650 }
1651 #endif
1652
1653 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1654 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1655 const xmlChar **atts;
1656 int *attallocs;
1657 int maxatts;
1658
1659 if (ctxt->atts == NULL) {
1660 maxatts = 55; /* allow for 10 attrs by default */
1661 atts = (const xmlChar **)
1662 xmlMalloc(maxatts * sizeof(xmlChar *));
1663 if (atts == NULL) goto mem_error;
1664 ctxt->atts = atts;
1665 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1666 if (attallocs == NULL) goto mem_error;
1667 ctxt->attallocs = attallocs;
1668 ctxt->maxatts = maxatts;
1669 } else if (nr + 5 > ctxt->maxatts) {
1670 maxatts = (nr + 5) * 2;
1671 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1672 maxatts * sizeof(const xmlChar *));
1673 if (atts == NULL) goto mem_error;
1674 ctxt->atts = atts;
1675 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1676 (maxatts / 5) * sizeof(int));
1677 if (attallocs == NULL) goto mem_error;
1678 ctxt->attallocs = attallocs;
1679 ctxt->maxatts = maxatts;
1680 }
1681 return(ctxt->maxatts);
1682 mem_error:
1683 xmlErrMemory(ctxt, NULL);
1684 return(-1);
1685 }
1686
1687 /**
1688 * inputPush:
1689 * @ctxt: an XML parser context
1690 * @value: the parser input
1691 *
1692 * Pushes a new parser input on top of the input stack
1693 *
1694 * Returns -1 in case of error, the index in the stack otherwise
1695 */
1696 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1697 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1698 {
1699 if ((ctxt == NULL) || (value == NULL))
1700 return(-1);
1701 if (ctxt->inputNr >= ctxt->inputMax) {
1702 ctxt->inputMax *= 2;
1703 ctxt->inputTab =
1704 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1705 ctxt->inputMax *
1706 sizeof(ctxt->inputTab[0]));
1707 if (ctxt->inputTab == NULL) {
1708 xmlErrMemory(ctxt, NULL);
1709 xmlFreeInputStream(value);
1710 ctxt->inputMax /= 2;
1711 value = NULL;
1712 return (-1);
1713 }
1714 }
1715 ctxt->inputTab[ctxt->inputNr] = value;
1716 ctxt->input = value;
1717 return (ctxt->inputNr++);
1718 }
1719 /**
1720 * inputPop:
1721 * @ctxt: an XML parser context
1722 *
1723 * Pops the top parser input from the input stack
1724 *
1725 * Returns the input just removed
1726 */
1727 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1728 inputPop(xmlParserCtxtPtr ctxt)
1729 {
1730 xmlParserInputPtr ret;
1731
1732 if (ctxt == NULL)
1733 return(NULL);
1734 if (ctxt->inputNr <= 0)
1735 return (NULL);
1736 ctxt->inputNr--;
1737 if (ctxt->inputNr > 0)
1738 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1739 else
1740 ctxt->input = NULL;
1741 ret = ctxt->inputTab[ctxt->inputNr];
1742 ctxt->inputTab[ctxt->inputNr] = NULL;
1743 return (ret);
1744 }
1745 /**
1746 * nodePush:
1747 * @ctxt: an XML parser context
1748 * @value: the element node
1749 *
1750 * Pushes a new element node on top of the node stack
1751 *
1752 * Returns -1 in case of error, the index in the stack otherwise
1753 */
1754 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1755 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1756 {
1757 if (ctxt == NULL) return(0);
1758 if (ctxt->nodeNr >= ctxt->nodeMax) {
1759 xmlNodePtr *tmp;
1760
1761 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1762 ctxt->nodeMax * 2 *
1763 sizeof(ctxt->nodeTab[0]));
1764 if (tmp == NULL) {
1765 xmlErrMemory(ctxt, NULL);
1766 return (-1);
1767 }
1768 ctxt->nodeTab = tmp;
1769 ctxt->nodeMax *= 2;
1770 }
1771 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1772 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1773 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1774 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1775 xmlParserMaxDepth);
1776 xmlHaltParser(ctxt);
1777 return(-1);
1778 }
1779 ctxt->nodeTab[ctxt->nodeNr] = value;
1780 ctxt->node = value;
1781 return (ctxt->nodeNr++);
1782 }
1783
1784 /**
1785 * nodePop:
1786 * @ctxt: an XML parser context
1787 *
1788 * Pops the top element node from the node stack
1789 *
1790 * Returns the node just removed
1791 */
1792 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)1793 nodePop(xmlParserCtxtPtr ctxt)
1794 {
1795 xmlNodePtr ret;
1796
1797 if (ctxt == NULL) return(NULL);
1798 if (ctxt->nodeNr <= 0)
1799 return (NULL);
1800 ctxt->nodeNr--;
1801 if (ctxt->nodeNr > 0)
1802 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1803 else
1804 ctxt->node = NULL;
1805 ret = ctxt->nodeTab[ctxt->nodeNr];
1806 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1807 return (ret);
1808 }
1809
1810 #ifdef LIBXML_PUSH_ENABLED
1811 /**
1812 * nameNsPush:
1813 * @ctxt: an XML parser context
1814 * @value: the element name
1815 * @prefix: the element prefix
1816 * @URI: the element namespace name
1817 *
1818 * Pushes a new element name/prefix/URL on top of the name stack
1819 *
1820 * Returns -1 in case of error, the index in the stack otherwise
1821 */
1822 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int nsNr)1823 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1824 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1825 {
1826 if (ctxt->nameNr >= ctxt->nameMax) {
1827 const xmlChar * *tmp;
1828 void **tmp2;
1829 ctxt->nameMax *= 2;
1830 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1831 ctxt->nameMax *
1832 sizeof(ctxt->nameTab[0]));
1833 if (tmp == NULL) {
1834 ctxt->nameMax /= 2;
1835 goto mem_error;
1836 }
1837 ctxt->nameTab = tmp;
1838 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1839 ctxt->nameMax * 3 *
1840 sizeof(ctxt->pushTab[0]));
1841 if (tmp2 == NULL) {
1842 ctxt->nameMax /= 2;
1843 goto mem_error;
1844 }
1845 ctxt->pushTab = tmp2;
1846 }
1847 ctxt->nameTab[ctxt->nameNr] = value;
1848 ctxt->name = value;
1849 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1850 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1851 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1852 return (ctxt->nameNr++);
1853 mem_error:
1854 xmlErrMemory(ctxt, NULL);
1855 return (-1);
1856 }
1857 /**
1858 * nameNsPop:
1859 * @ctxt: an XML parser context
1860 *
1861 * Pops the top element/prefix/URI name from the name stack
1862 *
1863 * Returns the name just removed
1864 */
1865 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)1866 nameNsPop(xmlParserCtxtPtr ctxt)
1867 {
1868 const xmlChar *ret;
1869
1870 if (ctxt->nameNr <= 0)
1871 return (NULL);
1872 ctxt->nameNr--;
1873 if (ctxt->nameNr > 0)
1874 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1875 else
1876 ctxt->name = NULL;
1877 ret = ctxt->nameTab[ctxt->nameNr];
1878 ctxt->nameTab[ctxt->nameNr] = NULL;
1879 return (ret);
1880 }
1881 #endif /* LIBXML_PUSH_ENABLED */
1882
1883 /**
1884 * namePush:
1885 * @ctxt: an XML parser context
1886 * @value: the element name
1887 *
1888 * Pushes a new element name on top of the name stack
1889 *
1890 * Returns -1 in case of error, the index in the stack otherwise
1891 */
1892 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)1893 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1894 {
1895 if (ctxt == NULL) return (-1);
1896
1897 if (ctxt->nameNr >= ctxt->nameMax) {
1898 const xmlChar * *tmp;
1899 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1900 ctxt->nameMax * 2 *
1901 sizeof(ctxt->nameTab[0]));
1902 if (tmp == NULL) {
1903 goto mem_error;
1904 }
1905 ctxt->nameTab = tmp;
1906 ctxt->nameMax *= 2;
1907 }
1908 ctxt->nameTab[ctxt->nameNr] = value;
1909 ctxt->name = value;
1910 return (ctxt->nameNr++);
1911 mem_error:
1912 xmlErrMemory(ctxt, NULL);
1913 return (-1);
1914 }
1915 /**
1916 * namePop:
1917 * @ctxt: an XML parser context
1918 *
1919 * Pops the top element name from the name stack
1920 *
1921 * Returns the name just removed
1922 */
1923 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)1924 namePop(xmlParserCtxtPtr ctxt)
1925 {
1926 const xmlChar *ret;
1927
1928 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1929 return (NULL);
1930 ctxt->nameNr--;
1931 if (ctxt->nameNr > 0)
1932 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1933 else
1934 ctxt->name = NULL;
1935 ret = ctxt->nameTab[ctxt->nameNr];
1936 ctxt->nameTab[ctxt->nameNr] = NULL;
1937 return (ret);
1938 }
1939
spacePush(xmlParserCtxtPtr ctxt,int val)1940 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1941 if (ctxt->spaceNr >= ctxt->spaceMax) {
1942 int *tmp;
1943
1944 ctxt->spaceMax *= 2;
1945 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1946 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1947 if (tmp == NULL) {
1948 xmlErrMemory(ctxt, NULL);
1949 ctxt->spaceMax /=2;
1950 return(-1);
1951 }
1952 ctxt->spaceTab = tmp;
1953 }
1954 ctxt->spaceTab[ctxt->spaceNr] = val;
1955 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1956 return(ctxt->spaceNr++);
1957 }
1958
spacePop(xmlParserCtxtPtr ctxt)1959 static int spacePop(xmlParserCtxtPtr ctxt) {
1960 int ret;
1961 if (ctxt->spaceNr <= 0) return(0);
1962 ctxt->spaceNr--;
1963 if (ctxt->spaceNr > 0)
1964 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1965 else
1966 ctxt->space = &ctxt->spaceTab[0];
1967 ret = ctxt->spaceTab[ctxt->spaceNr];
1968 ctxt->spaceTab[ctxt->spaceNr] = -1;
1969 return(ret);
1970 }
1971
1972 /*
1973 * Macros for accessing the content. Those should be used only by the parser,
1974 * and not exported.
1975 *
1976 * Dirty macros, i.e. one often need to make assumption on the context to
1977 * use them
1978 *
1979 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1980 * To be used with extreme caution since operations consuming
1981 * characters may move the input buffer to a different location !
1982 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1983 * This should be used internally by the parser
1984 * only to compare to ASCII values otherwise it would break when
1985 * running with UTF-8 encoding.
1986 * RAW same as CUR but in the input buffer, bypass any token
1987 * extraction that may have been done
1988 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1989 * to compare on ASCII based substring.
1990 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1991 * strings without newlines within the parser.
1992 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1993 * defined char within the parser.
1994 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1995 *
1996 * NEXT Skip to the next character, this does the proper decoding
1997 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1998 * NEXTL(l) Skip the current unicode character of l xmlChars long.
1999 * CUR_CHAR(l) returns the current unicode character (int), set l
2000 * to the number of xmlChars used for the encoding [0-5].
2001 * CUR_SCHAR same but operate on a string instead of the context
2002 * COPY_BUF copy the current unicode char to the target buffer, increment
2003 * the index
2004 * GROW, SHRINK handling of input buffers
2005 */
2006
2007 #define RAW (*ctxt->input->cur)
2008 #define CUR (*ctxt->input->cur)
2009 #define NXT(val) ctxt->input->cur[(val)]
2010 #define CUR_PTR ctxt->input->cur
2011
2012 #define CMP4( s, c1, c2, c3, c4 ) \
2013 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2014 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2015 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2016 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2017 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2018 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2019 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2020 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2021 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2022 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2023 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2024 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2025 ((unsigned char *) s)[ 8 ] == c9 )
2026 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2027 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2028 ((unsigned char *) s)[ 9 ] == c10 )
2029
2030 #define SKIP(val) do { \
2031 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
2032 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2033 if ((*ctxt->input->cur == 0) && \
2034 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2035 xmlPopInput(ctxt); \
2036 } while (0)
2037
2038 #define SKIPL(val) do { \
2039 int skipl; \
2040 for(skipl=0; skipl<val; skipl++) { \
2041 if (*(ctxt->input->cur) == '\n') { \
2042 ctxt->input->line++; ctxt->input->col = 1; \
2043 } else ctxt->input->col++; \
2044 ctxt->nbChars++; \
2045 ctxt->input->cur++; \
2046 } \
2047 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2048 if ((*ctxt->input->cur == 0) && \
2049 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2050 xmlPopInput(ctxt); \
2051 } while (0)
2052
2053 #define SHRINK if ((ctxt->progressive == 0) && \
2054 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2055 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2056 xmlSHRINK (ctxt);
2057
xmlSHRINK(xmlParserCtxtPtr ctxt)2058 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2059 xmlParserInputShrink(ctxt->input);
2060 if ((*ctxt->input->cur == 0) &&
2061 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2062 xmlPopInput(ctxt);
2063 }
2064
2065 #define GROW if ((ctxt->progressive == 0) && \
2066 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2067 xmlGROW (ctxt);
2068
xmlGROW(xmlParserCtxtPtr ctxt)2069 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2070 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2071 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2072
2073 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2074 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
2075 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
2076 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2077 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2078 xmlHaltParser(ctxt);
2079 return;
2080 }
2081 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2082 if ((ctxt->input->cur > ctxt->input->end) ||
2083 (ctxt->input->cur < ctxt->input->base)) {
2084 xmlHaltParser(ctxt);
2085 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2086 return;
2087 }
2088 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
2089 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2090 xmlPopInput(ctxt);
2091 }
2092
2093 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2094
2095 #define NEXT xmlNextChar(ctxt)
2096
2097 #define NEXT1 { \
2098 ctxt->input->col++; \
2099 ctxt->input->cur++; \
2100 ctxt->nbChars++; \
2101 if (*ctxt->input->cur == 0) \
2102 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2103 }
2104
2105 #define NEXTL(l) do { \
2106 if (*(ctxt->input->cur) == '\n') { \
2107 ctxt->input->line++; ctxt->input->col = 1; \
2108 } else ctxt->input->col++; \
2109 ctxt->input->cur += l; \
2110 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2111 } while (0)
2112
2113 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2114 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2115
2116 #define COPY_BUF(l,b,i,v) \
2117 if (l == 1) b[i++] = (xmlChar) v; \
2118 else i += xmlCopyCharMultiByte(&b[i],v)
2119
2120 /**
2121 * xmlSkipBlankChars:
2122 * @ctxt: the XML parser context
2123 *
2124 * skip all blanks character found at that point in the input streams.
2125 * It pops up finished entities in the process if allowable at that point.
2126 *
2127 * Returns the number of space chars skipped
2128 */
2129
2130 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2131 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2132 int res = 0;
2133
2134 /*
2135 * It's Okay to use CUR/NEXT here since all the blanks are on
2136 * the ASCII range.
2137 */
2138 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2139 const xmlChar *cur;
2140 /*
2141 * if we are in the document content, go really fast
2142 */
2143 cur = ctxt->input->cur;
2144 while (IS_BLANK_CH(*cur)) {
2145 if (*cur == '\n') {
2146 ctxt->input->line++; ctxt->input->col = 1;
2147 } else {
2148 ctxt->input->col++;
2149 }
2150 cur++;
2151 res++;
2152 if (*cur == 0) {
2153 ctxt->input->cur = cur;
2154 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2155 cur = ctxt->input->cur;
2156 }
2157 }
2158 ctxt->input->cur = cur;
2159 } else {
2160 int cur;
2161 do {
2162 cur = CUR;
2163 while ((IS_BLANK_CH(cur) && /* CHECKED tstblanks.xml */
2164 (ctxt->instate != XML_PARSER_EOF))) {
2165 NEXT;
2166 cur = CUR;
2167 res++;
2168 }
2169 while ((cur == 0) && (ctxt->inputNr > 1) &&
2170 (ctxt->instate != XML_PARSER_COMMENT)) {
2171 xmlPopInput(ctxt);
2172 cur = CUR;
2173 }
2174 /*
2175 * Need to handle support of entities branching here
2176 */
2177 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2178 } while ((IS_BLANK(cur)) && /* CHECKED tstblanks.xml */
2179 (ctxt->instate != XML_PARSER_EOF));
2180 }
2181 return(res);
2182 }
2183
2184 /************************************************************************
2185 * *
2186 * Commodity functions to handle entities *
2187 * *
2188 ************************************************************************/
2189
2190 /**
2191 * xmlPopInput:
2192 * @ctxt: an XML parser context
2193 *
2194 * xmlPopInput: the current input pointed by ctxt->input came to an end
2195 * pop it and return the next char.
2196 *
2197 * Returns the current xmlChar in the parser context
2198 */
2199 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2200 xmlPopInput(xmlParserCtxtPtr ctxt) {
2201 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2202 if (xmlParserDebugEntities)
2203 xmlGenericError(xmlGenericErrorContext,
2204 "Popping input %d\n", ctxt->inputNr);
2205 xmlFreeInputStream(inputPop(ctxt));
2206 if ((*ctxt->input->cur == 0) &&
2207 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2208 return(xmlPopInput(ctxt));
2209 return(CUR);
2210 }
2211
2212 /**
2213 * xmlPushInput:
2214 * @ctxt: an XML parser context
2215 * @input: an XML parser input fragment (entity, XML fragment ...).
2216 *
2217 * xmlPushInput: switch to a new input stream which is stacked on top
2218 * of the previous one(s).
2219 * Returns -1 in case of error or the index in the input stack
2220 */
2221 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2222 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2223 int ret;
2224 if (input == NULL) return(-1);
2225
2226 if (xmlParserDebugEntities) {
2227 if ((ctxt->input != NULL) && (ctxt->input->filename))
2228 xmlGenericError(xmlGenericErrorContext,
2229 "%s(%d): ", ctxt->input->filename,
2230 ctxt->input->line);
2231 xmlGenericError(xmlGenericErrorContext,
2232 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2233 }
2234 ret = inputPush(ctxt, input);
2235 if (ctxt->instate == XML_PARSER_EOF)
2236 return(-1);
2237 GROW;
2238 return(ret);
2239 }
2240
2241 /**
2242 * xmlParseCharRef:
2243 * @ctxt: an XML parser context
2244 *
2245 * parse Reference declarations
2246 *
2247 * [66] CharRef ::= '&#' [0-9]+ ';' |
2248 * '&#x' [0-9a-fA-F]+ ';'
2249 *
2250 * [ WFC: Legal Character ]
2251 * Characters referred to using character references must match the
2252 * production for Char.
2253 *
2254 * Returns the value parsed (as an int), 0 in case of error
2255 */
2256 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2257 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2258 unsigned int val = 0;
2259 int count = 0;
2260 unsigned int outofrange = 0;
2261
2262 /*
2263 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2264 */
2265 if ((RAW == '&') && (NXT(1) == '#') &&
2266 (NXT(2) == 'x')) {
2267 SKIP(3);
2268 GROW;
2269 while (RAW != ';') { /* loop blocked by count */
2270 if (count++ > 20) {
2271 count = 0;
2272 GROW;
2273 if (ctxt->instate == XML_PARSER_EOF)
2274 return(0);
2275 }
2276 if ((RAW >= '0') && (RAW <= '9'))
2277 val = val * 16 + (CUR - '0');
2278 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2279 val = val * 16 + (CUR - 'a') + 10;
2280 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2281 val = val * 16 + (CUR - 'A') + 10;
2282 else {
2283 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2284 val = 0;
2285 break;
2286 }
2287 if (val > 0x10FFFF)
2288 outofrange = val;
2289
2290 NEXT;
2291 count++;
2292 }
2293 if (RAW == ';') {
2294 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2295 ctxt->input->col++;
2296 ctxt->nbChars ++;
2297 ctxt->input->cur++;
2298 }
2299 } else if ((RAW == '&') && (NXT(1) == '#')) {
2300 SKIP(2);
2301 GROW;
2302 while (RAW != ';') { /* loop blocked by count */
2303 if (count++ > 20) {
2304 count = 0;
2305 GROW;
2306 if (ctxt->instate == XML_PARSER_EOF)
2307 return(0);
2308 }
2309 if ((RAW >= '0') && (RAW <= '9'))
2310 val = val * 10 + (CUR - '0');
2311 else {
2312 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2313 val = 0;
2314 break;
2315 }
2316 if (val > 0x10FFFF)
2317 outofrange = val;
2318
2319 NEXT;
2320 count++;
2321 }
2322 if (RAW == ';') {
2323 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2324 ctxt->input->col++;
2325 ctxt->nbChars ++;
2326 ctxt->input->cur++;
2327 }
2328 } else {
2329 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2330 }
2331
2332 /*
2333 * [ WFC: Legal Character ]
2334 * Characters referred to using character references must match the
2335 * production for Char.
2336 */
2337 if ((IS_CHAR(val) && (outofrange == 0))) {
2338 return(val);
2339 } else {
2340 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2341 "xmlParseCharRef: invalid xmlChar value %d\n",
2342 val);
2343 }
2344 return(0);
2345 }
2346
2347 /**
2348 * xmlParseStringCharRef:
2349 * @ctxt: an XML parser context
2350 * @str: a pointer to an index in the string
2351 *
2352 * parse Reference declarations, variant parsing from a string rather
2353 * than an an input flow.
2354 *
2355 * [66] CharRef ::= '&#' [0-9]+ ';' |
2356 * '&#x' [0-9a-fA-F]+ ';'
2357 *
2358 * [ WFC: Legal Character ]
2359 * Characters referred to using character references must match the
2360 * production for Char.
2361 *
2362 * Returns the value parsed (as an int), 0 in case of error, str will be
2363 * updated to the current value of the index
2364 */
2365 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2366 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2367 const xmlChar *ptr;
2368 xmlChar cur;
2369 unsigned int val = 0;
2370 unsigned int outofrange = 0;
2371
2372 if ((str == NULL) || (*str == NULL)) return(0);
2373 ptr = *str;
2374 cur = *ptr;
2375 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2376 ptr += 3;
2377 cur = *ptr;
2378 while (cur != ';') { /* Non input consuming loop */
2379 if ((cur >= '0') && (cur <= '9'))
2380 val = val * 16 + (cur - '0');
2381 else if ((cur >= 'a') && (cur <= 'f'))
2382 val = val * 16 + (cur - 'a') + 10;
2383 else if ((cur >= 'A') && (cur <= 'F'))
2384 val = val * 16 + (cur - 'A') + 10;
2385 else {
2386 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2387 val = 0;
2388 break;
2389 }
2390 if (val > 0x10FFFF)
2391 outofrange = val;
2392
2393 ptr++;
2394 cur = *ptr;
2395 }
2396 if (cur == ';')
2397 ptr++;
2398 } else if ((cur == '&') && (ptr[1] == '#')){
2399 ptr += 2;
2400 cur = *ptr;
2401 while (cur != ';') { /* Non input consuming loops */
2402 if ((cur >= '0') && (cur <= '9'))
2403 val = val * 10 + (cur - '0');
2404 else {
2405 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2406 val = 0;
2407 break;
2408 }
2409 if (val > 0x10FFFF)
2410 outofrange = val;
2411
2412 ptr++;
2413 cur = *ptr;
2414 }
2415 if (cur == ';')
2416 ptr++;
2417 } else {
2418 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2419 return(0);
2420 }
2421 *str = ptr;
2422
2423 /*
2424 * [ WFC: Legal Character ]
2425 * Characters referred to using character references must match the
2426 * production for Char.
2427 */
2428 if ((IS_CHAR(val) && (outofrange == 0))) {
2429 return(val);
2430 } else {
2431 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2432 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2433 val);
2434 }
2435 return(0);
2436 }
2437
2438 /**
2439 * xmlNewBlanksWrapperInputStream:
2440 * @ctxt: an XML parser context
2441 * @entity: an Entity pointer
2442 *
2443 * Create a new input stream for wrapping
2444 * blanks around a PEReference
2445 *
2446 * Returns the new input stream or NULL
2447 */
2448
deallocblankswrapper(xmlChar * str)2449 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2450
2451 static xmlParserInputPtr
xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)2452 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2453 xmlParserInputPtr input;
2454 xmlChar *buffer;
2455 size_t length;
2456 if (entity == NULL) {
2457 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2458 "xmlNewBlanksWrapperInputStream entity\n");
2459 return(NULL);
2460 }
2461 if (xmlParserDebugEntities)
2462 xmlGenericError(xmlGenericErrorContext,
2463 "new blanks wrapper for entity: %s\n", entity->name);
2464 input = xmlNewInputStream(ctxt);
2465 if (input == NULL) {
2466 return(NULL);
2467 }
2468 length = xmlStrlen(entity->name) + 5;
2469 buffer = xmlMallocAtomic(length);
2470 if (buffer == NULL) {
2471 xmlErrMemory(ctxt, NULL);
2472 xmlFree(input);
2473 return(NULL);
2474 }
2475 buffer [0] = ' ';
2476 buffer [1] = '%';
2477 buffer [length-3] = ';';
2478 buffer [length-2] = ' ';
2479 buffer [length-1] = 0;
2480 memcpy(buffer + 2, entity->name, length - 5);
2481 input->free = deallocblankswrapper;
2482 input->base = buffer;
2483 input->cur = buffer;
2484 input->length = length;
2485 input->end = &buffer[length];
2486 return(input);
2487 }
2488
2489 /**
2490 * xmlParserHandlePEReference:
2491 * @ctxt: the parser context
2492 *
2493 * [69] PEReference ::= '%' Name ';'
2494 *
2495 * [ WFC: No Recursion ]
2496 * A parsed entity must not contain a recursive
2497 * reference to itself, either directly or indirectly.
2498 *
2499 * [ WFC: Entity Declared ]
2500 * In a document without any DTD, a document with only an internal DTD
2501 * subset which contains no parameter entity references, or a document
2502 * with "standalone='yes'", ... ... The declaration of a parameter
2503 * entity must precede any reference to it...
2504 *
2505 * [ VC: Entity Declared ]
2506 * In a document with an external subset or external parameter entities
2507 * with "standalone='no'", ... ... The declaration of a parameter entity
2508 * must precede any reference to it...
2509 *
2510 * [ WFC: In DTD ]
2511 * Parameter-entity references may only appear in the DTD.
2512 * NOTE: misleading but this is handled.
2513 *
2514 * A PEReference may have been detected in the current input stream
2515 * the handling is done accordingly to
2516 * http://www.w3.org/TR/REC-xml#entproc
2517 * i.e.
2518 * - Included in literal in entity values
2519 * - Included as Parameter Entity reference within DTDs
2520 */
2521 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2522 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2523 const xmlChar *name;
2524 xmlEntityPtr entity = NULL;
2525 xmlParserInputPtr input;
2526
2527 if (RAW != '%') return;
2528 switch(ctxt->instate) {
2529 case XML_PARSER_CDATA_SECTION:
2530 return;
2531 case XML_PARSER_COMMENT:
2532 return;
2533 case XML_PARSER_START_TAG:
2534 return;
2535 case XML_PARSER_END_TAG:
2536 return;
2537 case XML_PARSER_EOF:
2538 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2539 return;
2540 case XML_PARSER_PROLOG:
2541 case XML_PARSER_START:
2542 case XML_PARSER_MISC:
2543 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2544 return;
2545 case XML_PARSER_ENTITY_DECL:
2546 case XML_PARSER_CONTENT:
2547 case XML_PARSER_ATTRIBUTE_VALUE:
2548 case XML_PARSER_PI:
2549 case XML_PARSER_SYSTEM_LITERAL:
2550 case XML_PARSER_PUBLIC_LITERAL:
2551 /* we just ignore it there */
2552 return;
2553 case XML_PARSER_EPILOG:
2554 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2555 return;
2556 case XML_PARSER_ENTITY_VALUE:
2557 /*
2558 * NOTE: in the case of entity values, we don't do the
2559 * substitution here since we need the literal
2560 * entity value to be able to save the internal
2561 * subset of the document.
2562 * This will be handled by xmlStringDecodeEntities
2563 */
2564 return;
2565 case XML_PARSER_DTD:
2566 /*
2567 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2568 * In the internal DTD subset, parameter-entity references
2569 * can occur only where markup declarations can occur, not
2570 * within markup declarations.
2571 * In that case this is handled in xmlParseMarkupDecl
2572 */
2573 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2574 return;
2575 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2576 return;
2577 break;
2578 case XML_PARSER_IGNORE:
2579 return;
2580 }
2581
2582 NEXT;
2583 name = xmlParseName(ctxt);
2584 if (xmlParserDebugEntities)
2585 xmlGenericError(xmlGenericErrorContext,
2586 "PEReference: %s\n", name);
2587 if (name == NULL) {
2588 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2589 } else {
2590 if (RAW == ';') {
2591 NEXT;
2592 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2593 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2594 if (ctxt->instate == XML_PARSER_EOF)
2595 return;
2596 if (entity == NULL) {
2597
2598 /*
2599 * [ WFC: Entity Declared ]
2600 * In a document without any DTD, a document with only an
2601 * internal DTD subset which contains no parameter entity
2602 * references, or a document with "standalone='yes'", ...
2603 * ... The declaration of a parameter entity must precede
2604 * any reference to it...
2605 */
2606 if ((ctxt->standalone == 1) ||
2607 ((ctxt->hasExternalSubset == 0) &&
2608 (ctxt->hasPErefs == 0))) {
2609 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2610 "PEReference: %%%s; not found\n", name);
2611 } else {
2612 /*
2613 * [ VC: Entity Declared ]
2614 * In a document with an external subset or external
2615 * parameter entities with "standalone='no'", ...
2616 * ... The declaration of a parameter entity must precede
2617 * any reference to it...
2618 */
2619 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2620 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2621 "PEReference: %%%s; not found\n",
2622 name, NULL);
2623 } else
2624 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2625 "PEReference: %%%s; not found\n",
2626 name, NULL);
2627 ctxt->valid = 0;
2628 }
2629 xmlParserEntityCheck(ctxt, 0, NULL, 0);
2630 } else if (ctxt->input->free != deallocblankswrapper) {
2631 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2632 if (xmlPushInput(ctxt, input) < 0)
2633 return;
2634 } else {
2635 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2636 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2637 xmlChar start[4];
2638 xmlCharEncoding enc;
2639
2640 /*
2641 * Note: external parameter entities will not be loaded, it
2642 * is not required for a non-validating parser, unless the
2643 * option of validating, or substituting entities were
2644 * given. Doing so is far more secure as the parser will
2645 * only process data coming from the document entity by
2646 * default.
2647 */
2648 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2649 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
2650 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
2651 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
2652 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
2653 (ctxt->replaceEntities == 0) &&
2654 (ctxt->validate == 0))
2655 return;
2656
2657 /*
2658 * handle the extra spaces added before and after
2659 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2660 * this is done independently.
2661 */
2662 input = xmlNewEntityInputStream(ctxt, entity);
2663 if (xmlPushInput(ctxt, input) < 0)
2664 return;
2665
2666 /*
2667 * Get the 4 first bytes and decode the charset
2668 * if enc != XML_CHAR_ENCODING_NONE
2669 * plug some encoding conversion routines.
2670 * Note that, since we may have some non-UTF8
2671 * encoding (like UTF16, bug 135229), the 'length'
2672 * is not known, but we can calculate based upon
2673 * the amount of data in the buffer.
2674 */
2675 GROW
2676 if (ctxt->instate == XML_PARSER_EOF)
2677 return;
2678 if ((ctxt->input->end - ctxt->input->cur)>=4) {
2679 start[0] = RAW;
2680 start[1] = NXT(1);
2681 start[2] = NXT(2);
2682 start[3] = NXT(3);
2683 enc = xmlDetectCharEncoding(start, 4);
2684 if (enc != XML_CHAR_ENCODING_NONE) {
2685 xmlSwitchEncoding(ctxt, enc);
2686 }
2687 }
2688
2689 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2690 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2691 (IS_BLANK_CH(NXT(5)))) {
2692 xmlParseTextDecl(ctxt);
2693 }
2694 } else {
2695 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2696 "PEReference: %s is not a parameter entity\n",
2697 name);
2698 }
2699 }
2700 } else {
2701 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2702 }
2703 }
2704 }
2705
2706 /*
2707 * Macro used to grow the current buffer.
2708 * buffer##_size is expected to be a size_t
2709 * mem_error: is expected to handle memory allocation failures
2710 */
2711 #define growBuffer(buffer, n) { \
2712 xmlChar *tmp; \
2713 size_t new_size = buffer##_size * 2 + n; \
2714 if (new_size < buffer##_size) goto mem_error; \
2715 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2716 if (tmp == NULL) goto mem_error; \
2717 buffer = tmp; \
2718 buffer##_size = new_size; \
2719 }
2720
2721 /**
2722 * xmlStringLenDecodeEntities:
2723 * @ctxt: the parser context
2724 * @str: the input string
2725 * @len: the string length
2726 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2727 * @end: an end marker xmlChar, 0 if none
2728 * @end2: an end marker xmlChar, 0 if none
2729 * @end3: an end marker xmlChar, 0 if none
2730 *
2731 * Takes a entity string content and process to do the adequate substitutions.
2732 *
2733 * [67] Reference ::= EntityRef | CharRef
2734 *
2735 * [69] PEReference ::= '%' Name ';'
2736 *
2737 * Returns A newly allocated string with the substitution done. The caller
2738 * must deallocate it !
2739 */
2740 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3)2741 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2742 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2743 xmlChar *buffer = NULL;
2744 size_t buffer_size = 0;
2745 size_t nbchars = 0;
2746
2747 xmlChar *current = NULL;
2748 xmlChar *rep = NULL;
2749 const xmlChar *last;
2750 xmlEntityPtr ent;
2751 int c,l;
2752
2753 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2754 return(NULL);
2755 last = str + len;
2756
2757 if (((ctxt->depth > 40) &&
2758 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2759 (ctxt->depth > 1024)) {
2760 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2761 return(NULL);
2762 }
2763
2764 /*
2765 * allocate a translation buffer.
2766 */
2767 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2768 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2769 if (buffer == NULL) goto mem_error;
2770
2771 /*
2772 * OK loop until we reach one of the ending char or a size limit.
2773 * we are operating on already parsed values.
2774 */
2775 if (str < last)
2776 c = CUR_SCHAR(str, l);
2777 else
2778 c = 0;
2779 while ((c != 0) && (c != end) && /* non input consuming loop */
2780 (c != end2) && (c != end3)) {
2781
2782 if (c == 0) break;
2783 if ((c == '&') && (str[1] == '#')) {
2784 int val = xmlParseStringCharRef(ctxt, &str);
2785 if (val != 0) {
2786 COPY_BUF(0,buffer,nbchars,val);
2787 }
2788 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2789 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2790 }
2791 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2792 if (xmlParserDebugEntities)
2793 xmlGenericError(xmlGenericErrorContext,
2794 "String decoding Entity Reference: %.30s\n",
2795 str);
2796 ent = xmlParseStringEntityRef(ctxt, &str);
2797 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2798 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2799 goto int_error;
2800 xmlParserEntityCheck(ctxt, 0, ent, 0);
2801 if (ent != NULL)
2802 ctxt->nbentities += ent->checked / 2;
2803 if ((ent != NULL) &&
2804 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2805 if (ent->content != NULL) {
2806 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2807 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2808 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2809 }
2810 } else {
2811 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2812 "predefined entity has no content\n");
2813 }
2814 } else if ((ent != NULL) && (ent->content != NULL)) {
2815 ctxt->depth++;
2816 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2817 0, 0, 0);
2818 ctxt->depth--;
2819
2820 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2821 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2822 goto int_error;
2823
2824 if (rep != NULL) {
2825 current = rep;
2826 while (*current != 0) { /* non input consuming loop */
2827 buffer[nbchars++] = *current++;
2828 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2829 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2830 goto int_error;
2831 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2832 }
2833 }
2834 xmlFree(rep);
2835 rep = NULL;
2836 }
2837 } else if (ent != NULL) {
2838 int i = xmlStrlen(ent->name);
2839 const xmlChar *cur = ent->name;
2840
2841 buffer[nbchars++] = '&';
2842 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2843 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2844 }
2845 for (;i > 0;i--)
2846 buffer[nbchars++] = *cur++;
2847 buffer[nbchars++] = ';';
2848 }
2849 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2850 if (xmlParserDebugEntities)
2851 xmlGenericError(xmlGenericErrorContext,
2852 "String decoding PE Reference: %.30s\n", str);
2853 ent = xmlParseStringPEReference(ctxt, &str);
2854 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2855 goto int_error;
2856 xmlParserEntityCheck(ctxt, 0, ent, 0);
2857 if (ent != NULL)
2858 ctxt->nbentities += ent->checked / 2;
2859 if (ent != NULL) {
2860 if (ent->content == NULL) {
2861 xmlLoadEntityContent(ctxt, ent);
2862 }
2863 ctxt->depth++;
2864 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2865 0, 0, 0);
2866 ctxt->depth--;
2867 if (rep != NULL) {
2868 current = rep;
2869 while (*current != 0) { /* non input consuming loop */
2870 buffer[nbchars++] = *current++;
2871 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2872 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2873 goto int_error;
2874 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2875 }
2876 }
2877 xmlFree(rep);
2878 rep = NULL;
2879 }
2880 }
2881 } else {
2882 COPY_BUF(l,buffer,nbchars,c);
2883 str += l;
2884 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2885 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2886 }
2887 }
2888 if (str < last)
2889 c = CUR_SCHAR(str, l);
2890 else
2891 c = 0;
2892 }
2893 buffer[nbchars] = 0;
2894 return(buffer);
2895
2896 mem_error:
2897 xmlErrMemory(ctxt, NULL);
2898 int_error:
2899 if (rep != NULL)
2900 xmlFree(rep);
2901 if (buffer != NULL)
2902 xmlFree(buffer);
2903 return(NULL);
2904 }
2905
2906 /**
2907 * xmlStringDecodeEntities:
2908 * @ctxt: the parser context
2909 * @str: the input string
2910 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2911 * @end: an end marker xmlChar, 0 if none
2912 * @end2: an end marker xmlChar, 0 if none
2913 * @end3: an end marker xmlChar, 0 if none
2914 *
2915 * Takes a entity string content and process to do the adequate substitutions.
2916 *
2917 * [67] Reference ::= EntityRef | CharRef
2918 *
2919 * [69] PEReference ::= '%' Name ';'
2920 *
2921 * Returns A newly allocated string with the substitution done. The caller
2922 * must deallocate it !
2923 */
2924 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what,xmlChar end,xmlChar end2,xmlChar end3)2925 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2926 xmlChar end, xmlChar end2, xmlChar end3) {
2927 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2928 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2929 end, end2, end3));
2930 }
2931
2932 /************************************************************************
2933 * *
2934 * Commodity functions, cleanup needed ? *
2935 * *
2936 ************************************************************************/
2937
2938 /**
2939 * areBlanks:
2940 * @ctxt: an XML parser context
2941 * @str: a xmlChar *
2942 * @len: the size of @str
2943 * @blank_chars: we know the chars are blanks
2944 *
2945 * Is this a sequence of blank chars that one can ignore ?
2946 *
2947 * Returns 1 if ignorable 0 otherwise.
2948 */
2949
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2950 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2951 int blank_chars) {
2952 int i, ret;
2953 xmlNodePtr lastChild;
2954
2955 /*
2956 * Don't spend time trying to differentiate them, the same callback is
2957 * used !
2958 */
2959 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2960 return(0);
2961
2962 /*
2963 * Check for xml:space value.
2964 */
2965 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2966 (*(ctxt->space) == -2))
2967 return(0);
2968
2969 /*
2970 * Check that the string is made of blanks
2971 */
2972 if (blank_chars == 0) {
2973 for (i = 0;i < len;i++)
2974 if (!(IS_BLANK_CH(str[i]))) return(0);
2975 }
2976
2977 /*
2978 * Look if the element is mixed content in the DTD if available
2979 */
2980 if (ctxt->node == NULL) return(0);
2981 if (ctxt->myDoc != NULL) {
2982 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2983 if (ret == 0) return(1);
2984 if (ret == 1) return(0);
2985 }
2986
2987 /*
2988 * Otherwise, heuristic :-\
2989 */
2990 if ((RAW != '<') && (RAW != 0xD)) return(0);
2991 if ((ctxt->node->children == NULL) &&
2992 (RAW == '<') && (NXT(1) == '/')) return(0);
2993
2994 lastChild = xmlGetLastChild(ctxt->node);
2995 if (lastChild == NULL) {
2996 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2997 (ctxt->node->content != NULL)) return(0);
2998 } else if (xmlNodeIsText(lastChild))
2999 return(0);
3000 else if ((ctxt->node->children != NULL) &&
3001 (xmlNodeIsText(ctxt->node->children)))
3002 return(0);
3003 return(1);
3004 }
3005
3006 /************************************************************************
3007 * *
3008 * Extra stuff for namespace support *
3009 * Relates to http://www.w3.org/TR/WD-xml-names *
3010 * *
3011 ************************************************************************/
3012
3013 /**
3014 * xmlSplitQName:
3015 * @ctxt: an XML parser context
3016 * @name: an XML parser context
3017 * @prefix: a xmlChar **
3018 *
3019 * parse an UTF8 encoded XML qualified name string
3020 *
3021 * [NS 5] QName ::= (Prefix ':')? LocalPart
3022 *
3023 * [NS 6] Prefix ::= NCName
3024 *
3025 * [NS 7] LocalPart ::= NCName
3026 *
3027 * Returns the local part, and prefix is updated
3028 * to get the Prefix if any.
3029 */
3030
3031 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefix)3032 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3033 xmlChar buf[XML_MAX_NAMELEN + 5];
3034 xmlChar *buffer = NULL;
3035 int len = 0;
3036 int max = XML_MAX_NAMELEN;
3037 xmlChar *ret = NULL;
3038 const xmlChar *cur = name;
3039 int c;
3040
3041 if (prefix == NULL) return(NULL);
3042 *prefix = NULL;
3043
3044 if (cur == NULL) return(NULL);
3045
3046 #ifndef XML_XML_NAMESPACE
3047 /* xml: prefix is not really a namespace */
3048 if ((cur[0] == 'x') && (cur[1] == 'm') &&
3049 (cur[2] == 'l') && (cur[3] == ':'))
3050 return(xmlStrdup(name));
3051 #endif
3052
3053 /* nasty but well=formed */
3054 if (cur[0] == ':')
3055 return(xmlStrdup(name));
3056
3057 c = *cur++;
3058 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3059 buf[len++] = c;
3060 c = *cur++;
3061 }
3062 if (len >= max) {
3063 /*
3064 * Okay someone managed to make a huge name, so he's ready to pay
3065 * for the processing speed.
3066 */
3067 max = len * 2;
3068
3069 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3070 if (buffer == NULL) {
3071 xmlErrMemory(ctxt, NULL);
3072 return(NULL);
3073 }
3074 memcpy(buffer, buf, len);
3075 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3076 if (len + 10 > max) {
3077 xmlChar *tmp;
3078
3079 max *= 2;
3080 tmp = (xmlChar *) xmlRealloc(buffer,
3081 max * sizeof(xmlChar));
3082 if (tmp == NULL) {
3083 xmlFree(buffer);
3084 xmlErrMemory(ctxt, NULL);
3085 return(NULL);
3086 }
3087 buffer = tmp;
3088 }
3089 buffer[len++] = c;
3090 c = *cur++;
3091 }
3092 buffer[len] = 0;
3093 }
3094
3095 if ((c == ':') && (*cur == 0)) {
3096 if (buffer != NULL)
3097 xmlFree(buffer);
3098 *prefix = NULL;
3099 return(xmlStrdup(name));
3100 }
3101
3102 if (buffer == NULL)
3103 ret = xmlStrndup(buf, len);
3104 else {
3105 ret = buffer;
3106 buffer = NULL;
3107 max = XML_MAX_NAMELEN;
3108 }
3109
3110
3111 if (c == ':') {
3112 c = *cur;
3113 *prefix = ret;
3114 if (c == 0) {
3115 return(xmlStrndup(BAD_CAST "", 0));
3116 }
3117 len = 0;
3118
3119 /*
3120 * Check that the first character is proper to start
3121 * a new name
3122 */
3123 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3124 ((c >= 0x41) && (c <= 0x5A)) ||
3125 (c == '_') || (c == ':'))) {
3126 int l;
3127 int first = CUR_SCHAR(cur, l);
3128
3129 if (!IS_LETTER(first) && (first != '_')) {
3130 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3131 "Name %s is not XML Namespace compliant\n",
3132 name);
3133 }
3134 }
3135 cur++;
3136
3137 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3138 buf[len++] = c;
3139 c = *cur++;
3140 }
3141 if (len >= max) {
3142 /*
3143 * Okay someone managed to make a huge name, so he's ready to pay
3144 * for the processing speed.
3145 */
3146 max = len * 2;
3147
3148 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3149 if (buffer == NULL) {
3150 xmlErrMemory(ctxt, NULL);
3151 return(NULL);
3152 }
3153 memcpy(buffer, buf, len);
3154 while (c != 0) { /* tested bigname2.xml */
3155 if (len + 10 > max) {
3156 xmlChar *tmp;
3157
3158 max *= 2;
3159 tmp = (xmlChar *) xmlRealloc(buffer,
3160 max * sizeof(xmlChar));
3161 if (tmp == NULL) {
3162 xmlErrMemory(ctxt, NULL);
3163 xmlFree(buffer);
3164 return(NULL);
3165 }
3166 buffer = tmp;
3167 }
3168 buffer[len++] = c;
3169 c = *cur++;
3170 }
3171 buffer[len] = 0;
3172 }
3173
3174 if (buffer == NULL)
3175 ret = xmlStrndup(buf, len);
3176 else {
3177 ret = buffer;
3178 }
3179 }
3180
3181 return(ret);
3182 }
3183
3184 /************************************************************************
3185 * *
3186 * The parser itself *
3187 * Relates to http://www.w3.org/TR/REC-xml *
3188 * *
3189 ************************************************************************/
3190
3191 /************************************************************************
3192 * *
3193 * Routines to parse Name, NCName and NmToken *
3194 * *
3195 ************************************************************************/
3196 #ifdef DEBUG
3197 static unsigned long nbParseName = 0;
3198 static unsigned long nbParseNmToken = 0;
3199 static unsigned long nbParseNCName = 0;
3200 static unsigned long nbParseNCNameComplex = 0;
3201 static unsigned long nbParseNameComplex = 0;
3202 static unsigned long nbParseStringName = 0;
3203 #endif
3204
3205 /*
3206 * The two following functions are related to the change of accepted
3207 * characters for Name and NmToken in the Revision 5 of XML-1.0
3208 * They correspond to the modified production [4] and the new production [4a]
3209 * changes in that revision. Also note that the macros used for the
3210 * productions Letter, Digit, CombiningChar and Extender are not needed
3211 * anymore.
3212 * We still keep compatibility to pre-revision5 parsing semantic if the
3213 * new XML_PARSE_OLD10 option is given to the parser.
3214 */
3215 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3216 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3217 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3218 /*
3219 * Use the new checks of production [4] [4a] amd [5] of the
3220 * Update 5 of XML-1.0
3221 */
3222 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3223 (((c >= 'a') && (c <= 'z')) ||
3224 ((c >= 'A') && (c <= 'Z')) ||
3225 (c == '_') || (c == ':') ||
3226 ((c >= 0xC0) && (c <= 0xD6)) ||
3227 ((c >= 0xD8) && (c <= 0xF6)) ||
3228 ((c >= 0xF8) && (c <= 0x2FF)) ||
3229 ((c >= 0x370) && (c <= 0x37D)) ||
3230 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3231 ((c >= 0x200C) && (c <= 0x200D)) ||
3232 ((c >= 0x2070) && (c <= 0x218F)) ||
3233 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3234 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3235 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3236 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3237 ((c >= 0x10000) && (c <= 0xEFFFF))))
3238 return(1);
3239 } else {
3240 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3241 return(1);
3242 }
3243 return(0);
3244 }
3245
3246 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3247 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3248 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3249 /*
3250 * Use the new checks of production [4] [4a] amd [5] of the
3251 * Update 5 of XML-1.0
3252 */
3253 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3254 (((c >= 'a') && (c <= 'z')) ||
3255 ((c >= 'A') && (c <= 'Z')) ||
3256 ((c >= '0') && (c <= '9')) || /* !start */
3257 (c == '_') || (c == ':') ||
3258 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3259 ((c >= 0xC0) && (c <= 0xD6)) ||
3260 ((c >= 0xD8) && (c <= 0xF6)) ||
3261 ((c >= 0xF8) && (c <= 0x2FF)) ||
3262 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3263 ((c >= 0x370) && (c <= 0x37D)) ||
3264 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3265 ((c >= 0x200C) && (c <= 0x200D)) ||
3266 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3267 ((c >= 0x2070) && (c <= 0x218F)) ||
3268 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3269 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3270 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3271 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3272 ((c >= 0x10000) && (c <= 0xEFFFF))))
3273 return(1);
3274 } else {
3275 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3276 (c == '.') || (c == '-') ||
3277 (c == '_') || (c == ':') ||
3278 (IS_COMBINING(c)) ||
3279 (IS_EXTENDER(c)))
3280 return(1);
3281 }
3282 return(0);
3283 }
3284
3285 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3286 int *len, int *alloc, int normalize);
3287
3288 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3289 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3290 int len = 0, l;
3291 int c;
3292 int count = 0;
3293
3294 #ifdef DEBUG
3295 nbParseNameComplex++;
3296 #endif
3297
3298 /*
3299 * Handler for more complex cases
3300 */
3301 GROW;
3302 if (ctxt->instate == XML_PARSER_EOF)
3303 return(NULL);
3304 c = CUR_CHAR(l);
3305 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3306 /*
3307 * Use the new checks of production [4] [4a] amd [5] of the
3308 * Update 5 of XML-1.0
3309 */
3310 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3311 (!(((c >= 'a') && (c <= 'z')) ||
3312 ((c >= 'A') && (c <= 'Z')) ||
3313 (c == '_') || (c == ':') ||
3314 ((c >= 0xC0) && (c <= 0xD6)) ||
3315 ((c >= 0xD8) && (c <= 0xF6)) ||
3316 ((c >= 0xF8) && (c <= 0x2FF)) ||
3317 ((c >= 0x370) && (c <= 0x37D)) ||
3318 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3319 ((c >= 0x200C) && (c <= 0x200D)) ||
3320 ((c >= 0x2070) && (c <= 0x218F)) ||
3321 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3322 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3323 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3324 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3325 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3326 return(NULL);
3327 }
3328 len += l;
3329 NEXTL(l);
3330 c = CUR_CHAR(l);
3331 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3332 (((c >= 'a') && (c <= 'z')) ||
3333 ((c >= 'A') && (c <= 'Z')) ||
3334 ((c >= '0') && (c <= '9')) || /* !start */
3335 (c == '_') || (c == ':') ||
3336 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3337 ((c >= 0xC0) && (c <= 0xD6)) ||
3338 ((c >= 0xD8) && (c <= 0xF6)) ||
3339 ((c >= 0xF8) && (c <= 0x2FF)) ||
3340 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3341 ((c >= 0x370) && (c <= 0x37D)) ||
3342 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3343 ((c >= 0x200C) && (c <= 0x200D)) ||
3344 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3345 ((c >= 0x2070) && (c <= 0x218F)) ||
3346 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3347 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3348 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3349 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3350 ((c >= 0x10000) && (c <= 0xEFFFF))
3351 )) {
3352 if (count++ > XML_PARSER_CHUNK_SIZE) {
3353 count = 0;
3354 GROW;
3355 if (ctxt->instate == XML_PARSER_EOF)
3356 return(NULL);
3357 }
3358 len += l;
3359 NEXTL(l);
3360 c = CUR_CHAR(l);
3361 }
3362 } else {
3363 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3364 (!IS_LETTER(c) && (c != '_') &&
3365 (c != ':'))) {
3366 return(NULL);
3367 }
3368 len += l;
3369 NEXTL(l);
3370 c = CUR_CHAR(l);
3371
3372 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3373 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3374 (c == '.') || (c == '-') ||
3375 (c == '_') || (c == ':') ||
3376 (IS_COMBINING(c)) ||
3377 (IS_EXTENDER(c)))) {
3378 if (count++ > XML_PARSER_CHUNK_SIZE) {
3379 count = 0;
3380 GROW;
3381 if (ctxt->instate == XML_PARSER_EOF)
3382 return(NULL);
3383 }
3384 len += l;
3385 NEXTL(l);
3386 c = CUR_CHAR(l);
3387 if (c == 0) {
3388 count = 0;
3389 GROW;
3390 if (ctxt->instate == XML_PARSER_EOF)
3391 return(NULL);
3392 c = CUR_CHAR(l);
3393 }
3394 }
3395 }
3396 if ((len > XML_MAX_NAME_LENGTH) &&
3397 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3398 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3399 return(NULL);
3400 }
3401 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3402 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3403 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3404 }
3405
3406 /**
3407 * xmlParseName:
3408 * @ctxt: an XML parser context
3409 *
3410 * parse an XML name.
3411 *
3412 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3413 * CombiningChar | Extender
3414 *
3415 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3416 *
3417 * [6] Names ::= Name (#x20 Name)*
3418 *
3419 * Returns the Name parsed or NULL
3420 */
3421
3422 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3423 xmlParseName(xmlParserCtxtPtr ctxt) {
3424 const xmlChar *in;
3425 const xmlChar *ret;
3426 int count = 0;
3427
3428 GROW;
3429
3430 #ifdef DEBUG
3431 nbParseName++;
3432 #endif
3433
3434 /*
3435 * Accelerator for simple ASCII names
3436 */
3437 in = ctxt->input->cur;
3438 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3439 ((*in >= 0x41) && (*in <= 0x5A)) ||
3440 (*in == '_') || (*in == ':')) {
3441 in++;
3442 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3443 ((*in >= 0x41) && (*in <= 0x5A)) ||
3444 ((*in >= 0x30) && (*in <= 0x39)) ||
3445 (*in == '_') || (*in == '-') ||
3446 (*in == ':') || (*in == '.'))
3447 in++;
3448 if ((*in > 0) && (*in < 0x80)) {
3449 count = in - ctxt->input->cur;
3450 if ((count > XML_MAX_NAME_LENGTH) &&
3451 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3452 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3453 return(NULL);
3454 }
3455 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3456 ctxt->input->cur = in;
3457 ctxt->nbChars += count;
3458 ctxt->input->col += count;
3459 if (ret == NULL)
3460 xmlErrMemory(ctxt, NULL);
3461 return(ret);
3462 }
3463 }
3464 /* accelerator for special cases */
3465 return(xmlParseNameComplex(ctxt));
3466 }
3467
3468 static const xmlChar *
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3469 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3470 int len = 0, l;
3471 int c;
3472 int count = 0;
3473 const xmlChar *end; /* needed because CUR_CHAR() can move cur on \r\n */
3474
3475 #ifdef DEBUG
3476 nbParseNCNameComplex++;
3477 #endif
3478
3479 /*
3480 * Handler for more complex cases
3481 */
3482 GROW;
3483 end = ctxt->input->cur;
3484 c = CUR_CHAR(l);
3485 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3486 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3487 return(NULL);
3488 }
3489
3490 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3491 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3492 if (count++ > XML_PARSER_CHUNK_SIZE) {
3493 if ((len > XML_MAX_NAME_LENGTH) &&
3494 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3495 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3496 return(NULL);
3497 }
3498 count = 0;
3499 GROW;
3500 if (ctxt->instate == XML_PARSER_EOF)
3501 return(NULL);
3502 }
3503 len += l;
3504 NEXTL(l);
3505 end = ctxt->input->cur;
3506 c = CUR_CHAR(l);
3507 if (c == 0) {
3508 count = 0;
3509 /*
3510 * when shrinking to extend the buffer we really need to preserve
3511 * the part of the name we already parsed. Hence rolling back
3512 * by current lenght.
3513 */
3514 ctxt->input->cur -= l;
3515 GROW;
3516 ctxt->input->cur += l;
3517 if (ctxt->instate == XML_PARSER_EOF)
3518 return(NULL);
3519 end = ctxt->input->cur;
3520 c = CUR_CHAR(l);
3521 }
3522 }
3523 if ((len > XML_MAX_NAME_LENGTH) &&
3524 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3525 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3526 return(NULL);
3527 }
3528 return(xmlDictLookup(ctxt->dict, end - len, len));
3529 }
3530
3531 /**
3532 * xmlParseNCName:
3533 * @ctxt: an XML parser context
3534 * @len: length of the string parsed
3535 *
3536 * parse an XML name.
3537 *
3538 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3539 * CombiningChar | Extender
3540 *
3541 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3542 *
3543 * Returns the Name parsed or NULL
3544 */
3545
3546 static const xmlChar *
xmlParseNCName(xmlParserCtxtPtr ctxt)3547 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3548 const xmlChar *in, *e;
3549 const xmlChar *ret;
3550 int count = 0;
3551
3552 #ifdef DEBUG
3553 nbParseNCName++;
3554 #endif
3555
3556 /*
3557 * Accelerator for simple ASCII names
3558 */
3559 in = ctxt->input->cur;
3560 e = ctxt->input->end;
3561 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3562 ((*in >= 0x41) && (*in <= 0x5A)) ||
3563 (*in == '_')) && (in < e)) {
3564 in++;
3565 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3566 ((*in >= 0x41) && (*in <= 0x5A)) ||
3567 ((*in >= 0x30) && (*in <= 0x39)) ||
3568 (*in == '_') || (*in == '-') ||
3569 (*in == '.')) && (in < e))
3570 in++;
3571 if (in >= e)
3572 goto complex;
3573 if ((*in > 0) && (*in < 0x80)) {
3574 count = in - ctxt->input->cur;
3575 if ((count > XML_MAX_NAME_LENGTH) &&
3576 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3577 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3578 return(NULL);
3579 }
3580 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3581 ctxt->input->cur = in;
3582 ctxt->nbChars += count;
3583 ctxt->input->col += count;
3584 if (ret == NULL) {
3585 xmlErrMemory(ctxt, NULL);
3586 }
3587 return(ret);
3588 }
3589 }
3590 complex:
3591 return(xmlParseNCNameComplex(ctxt));
3592 }
3593
3594 /**
3595 * xmlParseNameAndCompare:
3596 * @ctxt: an XML parser context
3597 *
3598 * parse an XML name and compares for match
3599 * (specialized for endtag parsing)
3600 *
3601 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3602 * and the name for mismatch
3603 */
3604
3605 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3606 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3607 register const xmlChar *cmp = other;
3608 register const xmlChar *in;
3609 const xmlChar *ret;
3610
3611 GROW;
3612 if (ctxt->instate == XML_PARSER_EOF)
3613 return(NULL);
3614
3615 in = ctxt->input->cur;
3616 while (*in != 0 && *in == *cmp) {
3617 ++in;
3618 ++cmp;
3619 ctxt->input->col++;
3620 }
3621 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3622 /* success */
3623 ctxt->input->cur = in;
3624 return (const xmlChar*) 1;
3625 }
3626 /* failure (or end of input buffer), check with full function */
3627 ret = xmlParseName (ctxt);
3628 /* strings coming from the dictionnary direct compare possible */
3629 if (ret == other) {
3630 return (const xmlChar*) 1;
3631 }
3632 return ret;
3633 }
3634
3635 /**
3636 * xmlParseStringName:
3637 * @ctxt: an XML parser context
3638 * @str: a pointer to the string pointer (IN/OUT)
3639 *
3640 * parse an XML name.
3641 *
3642 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3643 * CombiningChar | Extender
3644 *
3645 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3646 *
3647 * [6] Names ::= Name (#x20 Name)*
3648 *
3649 * Returns the Name parsed or NULL. The @str pointer
3650 * is updated to the current location in the string.
3651 */
3652
3653 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3654 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3655 xmlChar buf[XML_MAX_NAMELEN + 5];
3656 const xmlChar *cur = *str;
3657 int len = 0, l;
3658 int c;
3659
3660 #ifdef DEBUG
3661 nbParseStringName++;
3662 #endif
3663
3664 c = CUR_SCHAR(cur, l);
3665 if (!xmlIsNameStartChar(ctxt, c)) {
3666 return(NULL);
3667 }
3668
3669 COPY_BUF(l,buf,len,c);
3670 cur += l;
3671 c = CUR_SCHAR(cur, l);
3672 while (xmlIsNameChar(ctxt, c)) {
3673 COPY_BUF(l,buf,len,c);
3674 cur += l;
3675 c = CUR_SCHAR(cur, l);
3676 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3677 /*
3678 * Okay someone managed to make a huge name, so he's ready to pay
3679 * for the processing speed.
3680 */
3681 xmlChar *buffer;
3682 int max = len * 2;
3683
3684 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3685 if (buffer == NULL) {
3686 xmlErrMemory(ctxt, NULL);
3687 return(NULL);
3688 }
3689 memcpy(buffer, buf, len);
3690 while (xmlIsNameChar(ctxt, c)) {
3691 if (len + 10 > max) {
3692 xmlChar *tmp;
3693
3694 if ((len > XML_MAX_NAME_LENGTH) &&
3695 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3696 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3697 xmlFree(buffer);
3698 return(NULL);
3699 }
3700 max *= 2;
3701 tmp = (xmlChar *) xmlRealloc(buffer,
3702 max * sizeof(xmlChar));
3703 if (tmp == NULL) {
3704 xmlErrMemory(ctxt, NULL);
3705 xmlFree(buffer);
3706 return(NULL);
3707 }
3708 buffer = tmp;
3709 }
3710 COPY_BUF(l,buffer,len,c);
3711 cur += l;
3712 c = CUR_SCHAR(cur, l);
3713 }
3714 buffer[len] = 0;
3715 *str = cur;
3716 return(buffer);
3717 }
3718 }
3719 if ((len > XML_MAX_NAME_LENGTH) &&
3720 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3721 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3722 return(NULL);
3723 }
3724 *str = cur;
3725 return(xmlStrndup(buf, len));
3726 }
3727
3728 /**
3729 * xmlParseNmtoken:
3730 * @ctxt: an XML parser context
3731 *
3732 * parse an XML Nmtoken.
3733 *
3734 * [7] Nmtoken ::= (NameChar)+
3735 *
3736 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3737 *
3738 * Returns the Nmtoken parsed or NULL
3739 */
3740
3741 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3742 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3743 xmlChar buf[XML_MAX_NAMELEN + 5];
3744 int len = 0, l;
3745 int c;
3746 int count = 0;
3747
3748 #ifdef DEBUG
3749 nbParseNmToken++;
3750 #endif
3751
3752 GROW;
3753 if (ctxt->instate == XML_PARSER_EOF)
3754 return(NULL);
3755 c = CUR_CHAR(l);
3756
3757 while (xmlIsNameChar(ctxt, c)) {
3758 if (count++ > XML_PARSER_CHUNK_SIZE) {
3759 count = 0;
3760 GROW;
3761 }
3762 COPY_BUF(l,buf,len,c);
3763 NEXTL(l);
3764 c = CUR_CHAR(l);
3765 if (c == 0) {
3766 count = 0;
3767 GROW;
3768 if (ctxt->instate == XML_PARSER_EOF)
3769 return(NULL);
3770 c = CUR_CHAR(l);
3771 }
3772 if (len >= XML_MAX_NAMELEN) {
3773 /*
3774 * Okay someone managed to make a huge token, so he's ready to pay
3775 * for the processing speed.
3776 */
3777 xmlChar *buffer;
3778 int max = len * 2;
3779
3780 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3781 if (buffer == NULL) {
3782 xmlErrMemory(ctxt, NULL);
3783 return(NULL);
3784 }
3785 memcpy(buffer, buf, len);
3786 while (xmlIsNameChar(ctxt, c)) {
3787 if (count++ > XML_PARSER_CHUNK_SIZE) {
3788 count = 0;
3789 GROW;
3790 if (ctxt->instate == XML_PARSER_EOF) {
3791 xmlFree(buffer);
3792 return(NULL);
3793 }
3794 }
3795 if (len + 10 > max) {
3796 xmlChar *tmp;
3797
3798 if ((max > XML_MAX_NAME_LENGTH) &&
3799 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3800 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3801 xmlFree(buffer);
3802 return(NULL);
3803 }
3804 max *= 2;
3805 tmp = (xmlChar *) xmlRealloc(buffer,
3806 max * sizeof(xmlChar));
3807 if (tmp == NULL) {
3808 xmlErrMemory(ctxt, NULL);
3809 xmlFree(buffer);
3810 return(NULL);
3811 }
3812 buffer = tmp;
3813 }
3814 COPY_BUF(l,buffer,len,c);
3815 NEXTL(l);
3816 c = CUR_CHAR(l);
3817 }
3818 buffer[len] = 0;
3819 return(buffer);
3820 }
3821 }
3822 if (len == 0)
3823 return(NULL);
3824 if ((len > XML_MAX_NAME_LENGTH) &&
3825 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3826 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3827 return(NULL);
3828 }
3829 return(xmlStrndup(buf, len));
3830 }
3831
3832 /**
3833 * xmlParseEntityValue:
3834 * @ctxt: an XML parser context
3835 * @orig: if non-NULL store a copy of the original entity value
3836 *
3837 * parse a value for ENTITY declarations
3838 *
3839 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3840 * "'" ([^%&'] | PEReference | Reference)* "'"
3841 *
3842 * Returns the EntityValue parsed with reference substituted or NULL
3843 */
3844
3845 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3846 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3847 xmlChar *buf = NULL;
3848 int len = 0;
3849 int size = XML_PARSER_BUFFER_SIZE;
3850 int c, l;
3851 xmlChar stop;
3852 xmlChar *ret = NULL;
3853 const xmlChar *cur = NULL;
3854 xmlParserInputPtr input;
3855
3856 if (RAW == '"') stop = '"';
3857 else if (RAW == '\'') stop = '\'';
3858 else {
3859 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3860 return(NULL);
3861 }
3862 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3863 if (buf == NULL) {
3864 xmlErrMemory(ctxt, NULL);
3865 return(NULL);
3866 }
3867
3868 /*
3869 * The content of the entity definition is copied in a buffer.
3870 */
3871
3872 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3873 input = ctxt->input;
3874 GROW;
3875 if (ctxt->instate == XML_PARSER_EOF) {
3876 xmlFree(buf);
3877 return(NULL);
3878 }
3879 NEXT;
3880 c = CUR_CHAR(l);
3881 /*
3882 * NOTE: 4.4.5 Included in Literal
3883 * When a parameter entity reference appears in a literal entity
3884 * value, ... a single or double quote character in the replacement
3885 * text is always treated as a normal data character and will not
3886 * terminate the literal.
3887 * In practice it means we stop the loop only when back at parsing
3888 * the initial entity and the quote is found
3889 */
3890 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3891 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3892 if (len + 5 >= size) {
3893 xmlChar *tmp;
3894
3895 size *= 2;
3896 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3897 if (tmp == NULL) {
3898 xmlErrMemory(ctxt, NULL);
3899 xmlFree(buf);
3900 return(NULL);
3901 }
3902 buf = tmp;
3903 }
3904 COPY_BUF(l,buf,len,c);
3905 NEXTL(l);
3906 /*
3907 * Pop-up of finished entities.
3908 */
3909 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3910 xmlPopInput(ctxt);
3911
3912 GROW;
3913 c = CUR_CHAR(l);
3914 if (c == 0) {
3915 GROW;
3916 c = CUR_CHAR(l);
3917 }
3918 }
3919 buf[len] = 0;
3920 if (ctxt->instate == XML_PARSER_EOF) {
3921 xmlFree(buf);
3922 return(NULL);
3923 }
3924
3925 /*
3926 * Raise problem w.r.t. '&' and '%' being used in non-entities
3927 * reference constructs. Note Charref will be handled in
3928 * xmlStringDecodeEntities()
3929 */
3930 cur = buf;
3931 while (*cur != 0) { /* non input consuming */
3932 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3933 xmlChar *name;
3934 xmlChar tmp = *cur;
3935
3936 cur++;
3937 name = xmlParseStringName(ctxt, &cur);
3938 if ((name == NULL) || (*cur != ';')) {
3939 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3940 "EntityValue: '%c' forbidden except for entities references\n",
3941 tmp);
3942 }
3943 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3944 (ctxt->inputNr == 1)) {
3945 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3946 }
3947 if (name != NULL)
3948 xmlFree(name);
3949 if (*cur == 0)
3950 break;
3951 }
3952 cur++;
3953 }
3954
3955 /*
3956 * Then PEReference entities are substituted.
3957 */
3958 if (c != stop) {
3959 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3960 xmlFree(buf);
3961 } else {
3962 NEXT;
3963 /*
3964 * NOTE: 4.4.7 Bypassed
3965 * When a general entity reference appears in the EntityValue in
3966 * an entity declaration, it is bypassed and left as is.
3967 * so XML_SUBSTITUTE_REF is not set here.
3968 */
3969 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3970 0, 0, 0);
3971 if (orig != NULL)
3972 *orig = buf;
3973 else
3974 xmlFree(buf);
3975 }
3976
3977 return(ret);
3978 }
3979
3980 /**
3981 * xmlParseAttValueComplex:
3982 * @ctxt: an XML parser context
3983 * @len: the resulting attribute len
3984 * @normalize: wether to apply the inner normalization
3985 *
3986 * parse a value for an attribute, this is the fallback function
3987 * of xmlParseAttValue() when the attribute parsing requires handling
3988 * of non-ASCII characters, or normalization compaction.
3989 *
3990 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3991 */
3992 static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt,int * attlen,int normalize)3993 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3994 xmlChar limit = 0;
3995 xmlChar *buf = NULL;
3996 xmlChar *rep = NULL;
3997 size_t len = 0;
3998 size_t buf_size = 0;
3999 int c, l, in_space = 0;
4000 xmlChar *current = NULL;
4001 xmlEntityPtr ent;
4002
4003 if (NXT(0) == '"') {
4004 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4005 limit = '"';
4006 NEXT;
4007 } else if (NXT(0) == '\'') {
4008 limit = '\'';
4009 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4010 NEXT;
4011 } else {
4012 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4013 return(NULL);
4014 }
4015
4016 /*
4017 * allocate a translation buffer.
4018 */
4019 buf_size = XML_PARSER_BUFFER_SIZE;
4020 buf = (xmlChar *) xmlMallocAtomic(buf_size);
4021 if (buf == NULL) goto mem_error;
4022
4023 /*
4024 * OK loop until we reach one of the ending char or a size limit.
4025 */
4026 c = CUR_CHAR(l);
4027 while (((NXT(0) != limit) && /* checked */
4028 (IS_CHAR(c)) && (c != '<')) &&
4029 (ctxt->instate != XML_PARSER_EOF)) {
4030 /*
4031 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
4032 * special option is given
4033 */
4034 if ((len > XML_MAX_TEXT_LENGTH) &&
4035 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4036 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4037 "AttValue length too long\n");
4038 goto mem_error;
4039 }
4040 if (c == 0) break;
4041 if (c == '&') {
4042 in_space = 0;
4043 if (NXT(1) == '#') {
4044 int val = xmlParseCharRef(ctxt);
4045
4046 if (val == '&') {
4047 if (ctxt->replaceEntities) {
4048 if (len + 10 > buf_size) {
4049 growBuffer(buf, 10);
4050 }
4051 buf[len++] = '&';
4052 } else {
4053 /*
4054 * The reparsing will be done in xmlStringGetNodeList()
4055 * called by the attribute() function in SAX.c
4056 */
4057 if (len + 10 > buf_size) {
4058 growBuffer(buf, 10);
4059 }
4060 buf[len++] = '&';
4061 buf[len++] = '#';
4062 buf[len++] = '3';
4063 buf[len++] = '8';
4064 buf[len++] = ';';
4065 }
4066 } else if (val != 0) {
4067 if (len + 10 > buf_size) {
4068 growBuffer(buf, 10);
4069 }
4070 len += xmlCopyChar(0, &buf[len], val);
4071 }
4072 } else {
4073 ent = xmlParseEntityRef(ctxt);
4074 ctxt->nbentities++;
4075 if (ent != NULL)
4076 ctxt->nbentities += ent->owner;
4077 if ((ent != NULL) &&
4078 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4079 if (len + 10 > buf_size) {
4080 growBuffer(buf, 10);
4081 }
4082 if ((ctxt->replaceEntities == 0) &&
4083 (ent->content[0] == '&')) {
4084 buf[len++] = '&';
4085 buf[len++] = '#';
4086 buf[len++] = '3';
4087 buf[len++] = '8';
4088 buf[len++] = ';';
4089 } else {
4090 buf[len++] = ent->content[0];
4091 }
4092 } else if ((ent != NULL) &&
4093 (ctxt->replaceEntities != 0)) {
4094 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4095 rep = xmlStringDecodeEntities(ctxt, ent->content,
4096 XML_SUBSTITUTE_REF,
4097 0, 0, 0);
4098 if (rep != NULL) {
4099 current = rep;
4100 while (*current != 0) { /* non input consuming */
4101 if ((*current == 0xD) || (*current == 0xA) ||
4102 (*current == 0x9)) {
4103 buf[len++] = 0x20;
4104 current++;
4105 } else
4106 buf[len++] = *current++;
4107 if (len + 10 > buf_size) {
4108 growBuffer(buf, 10);
4109 }
4110 }
4111 xmlFree(rep);
4112 rep = NULL;
4113 }
4114 } else {
4115 if (len + 10 > buf_size) {
4116 growBuffer(buf, 10);
4117 }
4118 if (ent->content != NULL)
4119 buf[len++] = ent->content[0];
4120 }
4121 } else if (ent != NULL) {
4122 int i = xmlStrlen(ent->name);
4123 const xmlChar *cur = ent->name;
4124
4125 /*
4126 * This may look absurd but is needed to detect
4127 * entities problems
4128 */
4129 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4130 (ent->content != NULL) && (ent->checked == 0)) {
4131 unsigned long oldnbent = ctxt->nbentities;
4132
4133 rep = xmlStringDecodeEntities(ctxt, ent->content,
4134 XML_SUBSTITUTE_REF, 0, 0, 0);
4135
4136 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
4137 if (rep != NULL) {
4138 if (xmlStrchr(rep, '<'))
4139 ent->checked |= 1;
4140 xmlFree(rep);
4141 rep = NULL;
4142 }
4143 }
4144
4145 /*
4146 * Just output the reference
4147 */
4148 buf[len++] = '&';
4149 while (len + i + 10 > buf_size) {
4150 growBuffer(buf, i + 10);
4151 }
4152 for (;i > 0;i--)
4153 buf[len++] = *cur++;
4154 buf[len++] = ';';
4155 }
4156 }
4157 } else {
4158 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4159 if ((len != 0) || (!normalize)) {
4160 if ((!normalize) || (!in_space)) {
4161 COPY_BUF(l,buf,len,0x20);
4162 while (len + 10 > buf_size) {
4163 growBuffer(buf, 10);
4164 }
4165 }
4166 in_space = 1;
4167 }
4168 } else {
4169 in_space = 0;
4170 COPY_BUF(l,buf,len,c);
4171 if (len + 10 > buf_size) {
4172 growBuffer(buf, 10);
4173 }
4174 }
4175 NEXTL(l);
4176 }
4177 GROW;
4178 c = CUR_CHAR(l);
4179 }
4180 if (ctxt->instate == XML_PARSER_EOF)
4181 goto error;
4182
4183 if ((in_space) && (normalize)) {
4184 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4185 }
4186 buf[len] = 0;
4187 if (RAW == '<') {
4188 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4189 } else if (RAW != limit) {
4190 if ((c != 0) && (!IS_CHAR(c))) {
4191 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4192 "invalid character in attribute value\n");
4193 } else {
4194 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4195 "AttValue: ' expected\n");
4196 }
4197 } else
4198 NEXT;
4199
4200 /*
4201 * There we potentially risk an overflow, don't allow attribute value of
4202 * length more than INT_MAX it is a very reasonnable assumption !
4203 */
4204 if (len >= INT_MAX) {
4205 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4206 "AttValue length too long\n");
4207 goto mem_error;
4208 }
4209
4210 if (attlen != NULL) *attlen = (int) len;
4211 return(buf);
4212
4213 mem_error:
4214 xmlErrMemory(ctxt, NULL);
4215 error:
4216 if (buf != NULL)
4217 xmlFree(buf);
4218 if (rep != NULL)
4219 xmlFree(rep);
4220 return(NULL);
4221 }
4222
4223 /**
4224 * xmlParseAttValue:
4225 * @ctxt: an XML parser context
4226 *
4227 * parse a value for an attribute
4228 * Note: the parser won't do substitution of entities here, this
4229 * will be handled later in xmlStringGetNodeList
4230 *
4231 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4232 * "'" ([^<&'] | Reference)* "'"
4233 *
4234 * 3.3.3 Attribute-Value Normalization:
4235 * Before the value of an attribute is passed to the application or
4236 * checked for validity, the XML processor must normalize it as follows:
4237 * - a character reference is processed by appending the referenced
4238 * character to the attribute value
4239 * - an entity reference is processed by recursively processing the
4240 * replacement text of the entity
4241 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4242 * appending #x20 to the normalized value, except that only a single
4243 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4244 * parsed entity or the literal entity value of an internal parsed entity
4245 * - other characters are processed by appending them to the normalized value
4246 * If the declared value is not CDATA, then the XML processor must further
4247 * process the normalized attribute value by discarding any leading and
4248 * trailing space (#x20) characters, and by replacing sequences of space
4249 * (#x20) characters by a single space (#x20) character.
4250 * All attributes for which no declaration has been read should be treated
4251 * by a non-validating parser as if declared CDATA.
4252 *
4253 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4254 */
4255
4256
4257 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)4258 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4259 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4260 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4261 }
4262
4263 /**
4264 * xmlParseSystemLiteral:
4265 * @ctxt: an XML parser context
4266 *
4267 * parse an XML Literal
4268 *
4269 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4270 *
4271 * Returns the SystemLiteral parsed or NULL
4272 */
4273
4274 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4275 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4276 xmlChar *buf = NULL;
4277 int len = 0;
4278 int size = XML_PARSER_BUFFER_SIZE;
4279 int cur, l;
4280 xmlChar stop;
4281 int state = ctxt->instate;
4282 int count = 0;
4283
4284 SHRINK;
4285 if (RAW == '"') {
4286 NEXT;
4287 stop = '"';
4288 } else if (RAW == '\'') {
4289 NEXT;
4290 stop = '\'';
4291 } else {
4292 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4293 return(NULL);
4294 }
4295
4296 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4297 if (buf == NULL) {
4298 xmlErrMemory(ctxt, NULL);
4299 return(NULL);
4300 }
4301 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4302 cur = CUR_CHAR(l);
4303 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4304 if (len + 5 >= size) {
4305 xmlChar *tmp;
4306
4307 if ((size > XML_MAX_NAME_LENGTH) &&
4308 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4309 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4310 xmlFree(buf);
4311 ctxt->instate = (xmlParserInputState) state;
4312 return(NULL);
4313 }
4314 size *= 2;
4315 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4316 if (tmp == NULL) {
4317 xmlFree(buf);
4318 xmlErrMemory(ctxt, NULL);
4319 ctxt->instate = (xmlParserInputState) state;
4320 return(NULL);
4321 }
4322 buf = tmp;
4323 }
4324 count++;
4325 if (count > 50) {
4326 GROW;
4327 count = 0;
4328 if (ctxt->instate == XML_PARSER_EOF) {
4329 xmlFree(buf);
4330 return(NULL);
4331 }
4332 }
4333 COPY_BUF(l,buf,len,cur);
4334 NEXTL(l);
4335 cur = CUR_CHAR(l);
4336 if (cur == 0) {
4337 GROW;
4338 SHRINK;
4339 cur = CUR_CHAR(l);
4340 }
4341 }
4342 buf[len] = 0;
4343 ctxt->instate = (xmlParserInputState) state;
4344 if (!IS_CHAR(cur)) {
4345 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4346 } else {
4347 NEXT;
4348 }
4349 return(buf);
4350 }
4351
4352 /**
4353 * xmlParsePubidLiteral:
4354 * @ctxt: an XML parser context
4355 *
4356 * parse an XML public literal
4357 *
4358 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4359 *
4360 * Returns the PubidLiteral parsed or NULL.
4361 */
4362
4363 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4364 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4365 xmlChar *buf = NULL;
4366 int len = 0;
4367 int size = XML_PARSER_BUFFER_SIZE;
4368 xmlChar cur;
4369 xmlChar stop;
4370 int count = 0;
4371 xmlParserInputState oldstate = ctxt->instate;
4372
4373 SHRINK;
4374 if (RAW == '"') {
4375 NEXT;
4376 stop = '"';
4377 } else if (RAW == '\'') {
4378 NEXT;
4379 stop = '\'';
4380 } else {
4381 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4382 return(NULL);
4383 }
4384 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4385 if (buf == NULL) {
4386 xmlErrMemory(ctxt, NULL);
4387 return(NULL);
4388 }
4389 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4390 cur = CUR;
4391 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4392 if (len + 1 >= size) {
4393 xmlChar *tmp;
4394
4395 if ((size > XML_MAX_NAME_LENGTH) &&
4396 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4397 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4398 xmlFree(buf);
4399 return(NULL);
4400 }
4401 size *= 2;
4402 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4403 if (tmp == NULL) {
4404 xmlErrMemory(ctxt, NULL);
4405 xmlFree(buf);
4406 return(NULL);
4407 }
4408 buf = tmp;
4409 }
4410 buf[len++] = cur;
4411 count++;
4412 if (count > 50) {
4413 GROW;
4414 count = 0;
4415 if (ctxt->instate == XML_PARSER_EOF) {
4416 xmlFree(buf);
4417 return(NULL);
4418 }
4419 }
4420 NEXT;
4421 cur = CUR;
4422 if (cur == 0) {
4423 GROW;
4424 SHRINK;
4425 cur = CUR;
4426 }
4427 }
4428 buf[len] = 0;
4429 if (cur != stop) {
4430 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4431 } else {
4432 NEXT;
4433 }
4434 ctxt->instate = oldstate;
4435 return(buf);
4436 }
4437
4438 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4439
4440 /*
4441 * used for the test in the inner loop of the char data testing
4442 */
4443 static const unsigned char test_char_data[256] = {
4444 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4446 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4449 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4450 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4451 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4452 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4453 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4454 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4455 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4456 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4457 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4458 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4459 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4460 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4461 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4462 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4463 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4464 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4465 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4466 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4467 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4468 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4469 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4470 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4471 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4472 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4473 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4474 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4475 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4476 };
4477
4478 /**
4479 * xmlParseCharData:
4480 * @ctxt: an XML parser context
4481 * @cdata: int indicating whether we are within a CDATA section
4482 *
4483 * parse a CharData section.
4484 * if we are within a CDATA section ']]>' marks an end of section.
4485 *
4486 * The right angle bracket (>) may be represented using the string ">",
4487 * and must, for compatibility, be escaped using ">" or a character
4488 * reference when it appears in the string "]]>" in content, when that
4489 * string is not marking the end of a CDATA section.
4490 *
4491 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4492 */
4493
4494 void
xmlParseCharData(xmlParserCtxtPtr ctxt,int cdata)4495 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4496 const xmlChar *in;
4497 int nbchar = 0;
4498 int line = ctxt->input->line;
4499 int col = ctxt->input->col;
4500 int ccol;
4501
4502 SHRINK;
4503 GROW;
4504 /*
4505 * Accelerated common case where input don't need to be
4506 * modified before passing it to the handler.
4507 */
4508 if (!cdata) {
4509 in = ctxt->input->cur;
4510 do {
4511 get_more_space:
4512 while (*in == 0x20) { in++; ctxt->input->col++; }
4513 if (*in == 0xA) {
4514 do {
4515 ctxt->input->line++; ctxt->input->col = 1;
4516 in++;
4517 } while (*in == 0xA);
4518 goto get_more_space;
4519 }
4520 if (*in == '<') {
4521 nbchar = in - ctxt->input->cur;
4522 if (nbchar > 0) {
4523 const xmlChar *tmp = ctxt->input->cur;
4524 ctxt->input->cur = in;
4525
4526 if ((ctxt->sax != NULL) &&
4527 (ctxt->sax->ignorableWhitespace !=
4528 ctxt->sax->characters)) {
4529 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4530 if (ctxt->sax->ignorableWhitespace != NULL)
4531 ctxt->sax->ignorableWhitespace(ctxt->userData,
4532 tmp, nbchar);
4533 } else {
4534 if (ctxt->sax->characters != NULL)
4535 ctxt->sax->characters(ctxt->userData,
4536 tmp, nbchar);
4537 if (*ctxt->space == -1)
4538 *ctxt->space = -2;
4539 }
4540 } else if ((ctxt->sax != NULL) &&
4541 (ctxt->sax->characters != NULL)) {
4542 ctxt->sax->characters(ctxt->userData,
4543 tmp, nbchar);
4544 }
4545 }
4546 return;
4547 }
4548
4549 get_more:
4550 ccol = ctxt->input->col;
4551 while (test_char_data[*in]) {
4552 in++;
4553 ccol++;
4554 }
4555 ctxt->input->col = ccol;
4556 if (*in == 0xA) {
4557 do {
4558 ctxt->input->line++; ctxt->input->col = 1;
4559 in++;
4560 } while (*in == 0xA);
4561 goto get_more;
4562 }
4563 if (*in == ']') {
4564 if ((in[1] == ']') && (in[2] == '>')) {
4565 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4566 ctxt->input->cur = in;
4567 return;
4568 }
4569 in++;
4570 ctxt->input->col++;
4571 goto get_more;
4572 }
4573 nbchar = in - ctxt->input->cur;
4574 if (nbchar > 0) {
4575 if ((ctxt->sax != NULL) &&
4576 (ctxt->sax->ignorableWhitespace !=
4577 ctxt->sax->characters) &&
4578 (IS_BLANK_CH(*ctxt->input->cur))) {
4579 const xmlChar *tmp = ctxt->input->cur;
4580 ctxt->input->cur = in;
4581
4582 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4583 if (ctxt->sax->ignorableWhitespace != NULL)
4584 ctxt->sax->ignorableWhitespace(ctxt->userData,
4585 tmp, nbchar);
4586 } else {
4587 if (ctxt->sax->characters != NULL)
4588 ctxt->sax->characters(ctxt->userData,
4589 tmp, nbchar);
4590 if (*ctxt->space == -1)
4591 *ctxt->space = -2;
4592 }
4593 line = ctxt->input->line;
4594 col = ctxt->input->col;
4595 } else if (ctxt->sax != NULL) {
4596 if (ctxt->sax->characters != NULL)
4597 ctxt->sax->characters(ctxt->userData,
4598 ctxt->input->cur, nbchar);
4599 line = ctxt->input->line;
4600 col = ctxt->input->col;
4601 }
4602 /* something really bad happened in the SAX callback */
4603 if (ctxt->instate != XML_PARSER_CONTENT)
4604 return;
4605 }
4606 ctxt->input->cur = in;
4607 if (*in == 0xD) {
4608 in++;
4609 if (*in == 0xA) {
4610 ctxt->input->cur = in;
4611 in++;
4612 ctxt->input->line++; ctxt->input->col = 1;
4613 continue; /* while */
4614 }
4615 in--;
4616 }
4617 if (*in == '<') {
4618 return;
4619 }
4620 if (*in == '&') {
4621 return;
4622 }
4623 SHRINK;
4624 GROW;
4625 if (ctxt->instate == XML_PARSER_EOF)
4626 return;
4627 in = ctxt->input->cur;
4628 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4629 nbchar = 0;
4630 }
4631 ctxt->input->line = line;
4632 ctxt->input->col = col;
4633 xmlParseCharDataComplex(ctxt, cdata);
4634 }
4635
4636 /**
4637 * xmlParseCharDataComplex:
4638 * @ctxt: an XML parser context
4639 * @cdata: int indicating whether we are within a CDATA section
4640 *
4641 * parse a CharData section.this is the fallback function
4642 * of xmlParseCharData() when the parsing requires handling
4643 * of non-ASCII characters.
4644 */
4645 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int cdata)4646 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4647 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4648 int nbchar = 0;
4649 int cur, l;
4650 int count = 0;
4651
4652 SHRINK;
4653 GROW;
4654 cur = CUR_CHAR(l);
4655 while ((cur != '<') && /* checked */
4656 (cur != '&') &&
4657 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4658 if ((cur == ']') && (NXT(1) == ']') &&
4659 (NXT(2) == '>')) {
4660 if (cdata) break;
4661 else {
4662 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4663 }
4664 }
4665 COPY_BUF(l,buf,nbchar,cur);
4666 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4667 buf[nbchar] = 0;
4668
4669 /*
4670 * OK the segment is to be consumed as chars.
4671 */
4672 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4673 if (areBlanks(ctxt, buf, nbchar, 0)) {
4674 if (ctxt->sax->ignorableWhitespace != NULL)
4675 ctxt->sax->ignorableWhitespace(ctxt->userData,
4676 buf, nbchar);
4677 } else {
4678 if (ctxt->sax->characters != NULL)
4679 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4680 if ((ctxt->sax->characters !=
4681 ctxt->sax->ignorableWhitespace) &&
4682 (*ctxt->space == -1))
4683 *ctxt->space = -2;
4684 }
4685 }
4686 nbchar = 0;
4687 /* something really bad happened in the SAX callback */
4688 if (ctxt->instate != XML_PARSER_CONTENT)
4689 return;
4690 }
4691 count++;
4692 if (count > 50) {
4693 GROW;
4694 count = 0;
4695 if (ctxt->instate == XML_PARSER_EOF)
4696 return;
4697 }
4698 NEXTL(l);
4699 cur = CUR_CHAR(l);
4700 }
4701 if (nbchar != 0) {
4702 buf[nbchar] = 0;
4703 /*
4704 * OK the segment is to be consumed as chars.
4705 */
4706 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4707 if (areBlanks(ctxt, buf, nbchar, 0)) {
4708 if (ctxt->sax->ignorableWhitespace != NULL)
4709 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4710 } else {
4711 if (ctxt->sax->characters != NULL)
4712 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4713 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4714 (*ctxt->space == -1))
4715 *ctxt->space = -2;
4716 }
4717 }
4718 }
4719 if ((cur != 0) && (!IS_CHAR(cur))) {
4720 /* Generate the error and skip the offending character */
4721 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4722 "PCDATA invalid Char value %d\n",
4723 cur);
4724 NEXTL(l);
4725 }
4726 }
4727
4728 /**
4729 * xmlParseExternalID:
4730 * @ctxt: an XML parser context
4731 * @publicID: a xmlChar** receiving PubidLiteral
4732 * @strict: indicate whether we should restrict parsing to only
4733 * production [75], see NOTE below
4734 *
4735 * Parse an External ID or a Public ID
4736 *
4737 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4738 * 'PUBLIC' S PubidLiteral S SystemLiteral
4739 *
4740 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4741 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4742 *
4743 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4744 *
4745 * Returns the function returns SystemLiteral and in the second
4746 * case publicID receives PubidLiteral, is strict is off
4747 * it is possible to return NULL and have publicID set.
4748 */
4749
4750 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)4751 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4752 xmlChar *URI = NULL;
4753
4754 SHRINK;
4755
4756 *publicID = NULL;
4757 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4758 SKIP(6);
4759 if (!IS_BLANK_CH(CUR)) {
4760 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4761 "Space required after 'SYSTEM'\n");
4762 }
4763 SKIP_BLANKS;
4764 URI = xmlParseSystemLiteral(ctxt);
4765 if (URI == NULL) {
4766 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4767 }
4768 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4769 SKIP(6);
4770 if (!IS_BLANK_CH(CUR)) {
4771 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4772 "Space required after 'PUBLIC'\n");
4773 }
4774 SKIP_BLANKS;
4775 *publicID = xmlParsePubidLiteral(ctxt);
4776 if (*publicID == NULL) {
4777 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4778 }
4779 if (strict) {
4780 /*
4781 * We don't handle [83] so "S SystemLiteral" is required.
4782 */
4783 if (!IS_BLANK_CH(CUR)) {
4784 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4785 "Space required after the Public Identifier\n");
4786 }
4787 } else {
4788 /*
4789 * We handle [83] so we return immediately, if
4790 * "S SystemLiteral" is not detected. From a purely parsing
4791 * point of view that's a nice mess.
4792 */
4793 const xmlChar *ptr;
4794 GROW;
4795
4796 ptr = CUR_PTR;
4797 if (!IS_BLANK_CH(*ptr)) return(NULL);
4798
4799 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4800 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4801 }
4802 SKIP_BLANKS;
4803 URI = xmlParseSystemLiteral(ctxt);
4804 if (URI == NULL) {
4805 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4806 }
4807 }
4808 return(URI);
4809 }
4810
4811 /**
4812 * xmlParseCommentComplex:
4813 * @ctxt: an XML parser context
4814 * @buf: the already parsed part of the buffer
4815 * @len: number of bytes filles in the buffer
4816 * @size: allocated size of the buffer
4817 *
4818 * Skip an XML (SGML) comment <!-- .... -->
4819 * The spec says that "For compatibility, the string "--" (double-hyphen)
4820 * must not occur within comments. "
4821 * This is the slow routine in case the accelerator for ascii didn't work
4822 *
4823 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4824 */
4825 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,size_t len,size_t size)4826 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4827 size_t len, size_t size) {
4828 int q, ql;
4829 int r, rl;
4830 int cur, l;
4831 size_t count = 0;
4832 int inputid;
4833
4834 inputid = ctxt->input->id;
4835
4836 if (buf == NULL) {
4837 len = 0;
4838 size = XML_PARSER_BUFFER_SIZE;
4839 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4840 if (buf == NULL) {
4841 xmlErrMemory(ctxt, NULL);
4842 return;
4843 }
4844 }
4845 GROW; /* Assure there's enough input data */
4846 q = CUR_CHAR(ql);
4847 if (q == 0)
4848 goto not_terminated;
4849 if (!IS_CHAR(q)) {
4850 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4851 "xmlParseComment: invalid xmlChar value %d\n",
4852 q);
4853 xmlFree (buf);
4854 return;
4855 }
4856 NEXTL(ql);
4857 r = CUR_CHAR(rl);
4858 if (r == 0)
4859 goto not_terminated;
4860 if (!IS_CHAR(r)) {
4861 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4862 "xmlParseComment: invalid xmlChar value %d\n",
4863 q);
4864 xmlFree (buf);
4865 return;
4866 }
4867 NEXTL(rl);
4868 cur = CUR_CHAR(l);
4869 if (cur == 0)
4870 goto not_terminated;
4871 while (IS_CHAR(cur) && /* checked */
4872 ((cur != '>') ||
4873 (r != '-') || (q != '-'))) {
4874 if ((r == '-') && (q == '-')) {
4875 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4876 }
4877 if ((len > XML_MAX_TEXT_LENGTH) &&
4878 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4879 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4880 "Comment too big found", NULL);
4881 xmlFree (buf);
4882 return;
4883 }
4884 if (len + 5 >= size) {
4885 xmlChar *new_buf;
4886 size_t new_size;
4887
4888 new_size = size * 2;
4889 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4890 if (new_buf == NULL) {
4891 xmlFree (buf);
4892 xmlErrMemory(ctxt, NULL);
4893 return;
4894 }
4895 buf = new_buf;
4896 size = new_size;
4897 }
4898 COPY_BUF(ql,buf,len,q);
4899 q = r;
4900 ql = rl;
4901 r = cur;
4902 rl = l;
4903
4904 count++;
4905 if (count > 50) {
4906 GROW;
4907 count = 0;
4908 if (ctxt->instate == XML_PARSER_EOF) {
4909 xmlFree(buf);
4910 return;
4911 }
4912 }
4913 NEXTL(l);
4914 cur = CUR_CHAR(l);
4915 if (cur == 0) {
4916 SHRINK;
4917 GROW;
4918 cur = CUR_CHAR(l);
4919 }
4920 }
4921 buf[len] = 0;
4922 if (cur == 0) {
4923 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4924 "Comment not terminated \n<!--%.50s\n", buf);
4925 } else if (!IS_CHAR(cur)) {
4926 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4927 "xmlParseComment: invalid xmlChar value %d\n",
4928 cur);
4929 } else {
4930 if (inputid != ctxt->input->id) {
4931 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4932 "Comment doesn't start and stop in the same entity\n");
4933 }
4934 NEXT;
4935 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4936 (!ctxt->disableSAX))
4937 ctxt->sax->comment(ctxt->userData, buf);
4938 }
4939 xmlFree(buf);
4940 return;
4941 not_terminated:
4942 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4943 "Comment not terminated\n", NULL);
4944 xmlFree(buf);
4945 return;
4946 }
4947
4948 /**
4949 * xmlParseComment:
4950 * @ctxt: an XML parser context
4951 *
4952 * Skip an XML (SGML) comment <!-- .... -->
4953 * The spec says that "For compatibility, the string "--" (double-hyphen)
4954 * must not occur within comments. "
4955 *
4956 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4957 */
4958 void
xmlParseComment(xmlParserCtxtPtr ctxt)4959 xmlParseComment(xmlParserCtxtPtr ctxt) {
4960 xmlChar *buf = NULL;
4961 size_t size = XML_PARSER_BUFFER_SIZE;
4962 size_t len = 0;
4963 xmlParserInputState state;
4964 const xmlChar *in;
4965 size_t nbchar = 0;
4966 int ccol;
4967 int inputid;
4968
4969 /*
4970 * Check that there is a comment right here.
4971 */
4972 if ((RAW != '<') || (NXT(1) != '!') ||
4973 (NXT(2) != '-') || (NXT(3) != '-')) return;
4974 state = ctxt->instate;
4975 ctxt->instate = XML_PARSER_COMMENT;
4976 inputid = ctxt->input->id;
4977 SKIP(4);
4978 SHRINK;
4979 GROW;
4980
4981 /*
4982 * Accelerated common case where input don't need to be
4983 * modified before passing it to the handler.
4984 */
4985 in = ctxt->input->cur;
4986 do {
4987 if (*in == 0xA) {
4988 do {
4989 ctxt->input->line++; ctxt->input->col = 1;
4990 in++;
4991 } while (*in == 0xA);
4992 }
4993 get_more:
4994 ccol = ctxt->input->col;
4995 while (((*in > '-') && (*in <= 0x7F)) ||
4996 ((*in >= 0x20) && (*in < '-')) ||
4997 (*in == 0x09)) {
4998 in++;
4999 ccol++;
5000 }
5001 ctxt->input->col = ccol;
5002 if (*in == 0xA) {
5003 do {
5004 ctxt->input->line++; ctxt->input->col = 1;
5005 in++;
5006 } while (*in == 0xA);
5007 goto get_more;
5008 }
5009 nbchar = in - ctxt->input->cur;
5010 /*
5011 * save current set of data
5012 */
5013 if (nbchar > 0) {
5014 if ((ctxt->sax != NULL) &&
5015 (ctxt->sax->comment != NULL)) {
5016 if (buf == NULL) {
5017 if ((*in == '-') && (in[1] == '-'))
5018 size = nbchar + 1;
5019 else
5020 size = XML_PARSER_BUFFER_SIZE + nbchar;
5021 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5022 if (buf == NULL) {
5023 xmlErrMemory(ctxt, NULL);
5024 ctxt->instate = state;
5025 return;
5026 }
5027 len = 0;
5028 } else if (len + nbchar + 1 >= size) {
5029 xmlChar *new_buf;
5030 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5031 new_buf = (xmlChar *) xmlRealloc(buf,
5032 size * sizeof(xmlChar));
5033 if (new_buf == NULL) {
5034 xmlFree (buf);
5035 xmlErrMemory(ctxt, NULL);
5036 ctxt->instate = state;
5037 return;
5038 }
5039 buf = new_buf;
5040 }
5041 memcpy(&buf[len], ctxt->input->cur, nbchar);
5042 len += nbchar;
5043 buf[len] = 0;
5044 }
5045 }
5046 if ((len > XML_MAX_TEXT_LENGTH) &&
5047 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5048 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5049 "Comment too big found", NULL);
5050 xmlFree (buf);
5051 return;
5052 }
5053 ctxt->input->cur = in;
5054 if (*in == 0xA) {
5055 in++;
5056 ctxt->input->line++; ctxt->input->col = 1;
5057 }
5058 if (*in == 0xD) {
5059 in++;
5060 if (*in == 0xA) {
5061 ctxt->input->cur = in;
5062 in++;
5063 ctxt->input->line++; ctxt->input->col = 1;
5064 continue; /* while */
5065 }
5066 in--;
5067 }
5068 SHRINK;
5069 GROW;
5070 if (ctxt->instate == XML_PARSER_EOF) {
5071 xmlFree(buf);
5072 return;
5073 }
5074 in = ctxt->input->cur;
5075 if (*in == '-') {
5076 if (in[1] == '-') {
5077 if (in[2] == '>') {
5078 if (ctxt->input->id != inputid) {
5079 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5080 "comment doesn't start and stop in the same entity\n");
5081 }
5082 SKIP(3);
5083 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5084 (!ctxt->disableSAX)) {
5085 if (buf != NULL)
5086 ctxt->sax->comment(ctxt->userData, buf);
5087 else
5088 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5089 }
5090 if (buf != NULL)
5091 xmlFree(buf);
5092 if (ctxt->instate != XML_PARSER_EOF)
5093 ctxt->instate = state;
5094 return;
5095 }
5096 if (buf != NULL) {
5097 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5098 "Double hyphen within comment: "
5099 "<!--%.50s\n",
5100 buf);
5101 } else
5102 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5103 "Double hyphen within comment\n", NULL);
5104 in++;
5105 ctxt->input->col++;
5106 }
5107 in++;
5108 ctxt->input->col++;
5109 goto get_more;
5110 }
5111 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5112 xmlParseCommentComplex(ctxt, buf, len, size);
5113 ctxt->instate = state;
5114 return;
5115 }
5116
5117
5118 /**
5119 * xmlParsePITarget:
5120 * @ctxt: an XML parser context
5121 *
5122 * parse the name of a PI
5123 *
5124 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5125 *
5126 * Returns the PITarget name or NULL
5127 */
5128
5129 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)5130 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5131 const xmlChar *name;
5132
5133 name = xmlParseName(ctxt);
5134 if ((name != NULL) &&
5135 ((name[0] == 'x') || (name[0] == 'X')) &&
5136 ((name[1] == 'm') || (name[1] == 'M')) &&
5137 ((name[2] == 'l') || (name[2] == 'L'))) {
5138 int i;
5139 if ((name[0] == 'x') && (name[1] == 'm') &&
5140 (name[2] == 'l') && (name[3] == 0)) {
5141 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5142 "XML declaration allowed only at the start of the document\n");
5143 return(name);
5144 } else if (name[3] == 0) {
5145 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5146 return(name);
5147 }
5148 for (i = 0;;i++) {
5149 if (xmlW3CPIs[i] == NULL) break;
5150 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5151 return(name);
5152 }
5153 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5154 "xmlParsePITarget: invalid name prefix 'xml'\n",
5155 NULL, NULL);
5156 }
5157 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5158 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5159 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5160 }
5161 return(name);
5162 }
5163
5164 #ifdef LIBXML_CATALOG_ENABLED
5165 /**
5166 * xmlParseCatalogPI:
5167 * @ctxt: an XML parser context
5168 * @catalog: the PI value string
5169 *
5170 * parse an XML Catalog Processing Instruction.
5171 *
5172 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5173 *
5174 * Occurs only if allowed by the user and if happening in the Misc
5175 * part of the document before any doctype informations
5176 * This will add the given catalog to the parsing context in order
5177 * to be used if there is a resolution need further down in the document
5178 */
5179
5180 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)5181 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5182 xmlChar *URL = NULL;
5183 const xmlChar *tmp, *base;
5184 xmlChar marker;
5185
5186 tmp = catalog;
5187 while (IS_BLANK_CH(*tmp)) tmp++;
5188 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5189 goto error;
5190 tmp += 7;
5191 while (IS_BLANK_CH(*tmp)) tmp++;
5192 if (*tmp != '=') {
5193 return;
5194 }
5195 tmp++;
5196 while (IS_BLANK_CH(*tmp)) tmp++;
5197 marker = *tmp;
5198 if ((marker != '\'') && (marker != '"'))
5199 goto error;
5200 tmp++;
5201 base = tmp;
5202 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5203 if (*tmp == 0)
5204 goto error;
5205 URL = xmlStrndup(base, tmp - base);
5206 tmp++;
5207 while (IS_BLANK_CH(*tmp)) tmp++;
5208 if (*tmp != 0)
5209 goto error;
5210
5211 if (URL != NULL) {
5212 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5213 xmlFree(URL);
5214 }
5215 return;
5216
5217 error:
5218 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5219 "Catalog PI syntax error: %s\n",
5220 catalog, NULL);
5221 if (URL != NULL)
5222 xmlFree(URL);
5223 }
5224 #endif
5225
5226 /**
5227 * xmlParsePI:
5228 * @ctxt: an XML parser context
5229 *
5230 * parse an XML Processing Instruction.
5231 *
5232 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5233 *
5234 * The processing is transfered to SAX once parsed.
5235 */
5236
5237 void
xmlParsePI(xmlParserCtxtPtr ctxt)5238 xmlParsePI(xmlParserCtxtPtr ctxt) {
5239 xmlChar *buf = NULL;
5240 size_t len = 0;
5241 size_t size = XML_PARSER_BUFFER_SIZE;
5242 int cur, l;
5243 const xmlChar *target;
5244 xmlParserInputState state;
5245 int count = 0;
5246
5247 if ((RAW == '<') && (NXT(1) == '?')) {
5248 xmlParserInputPtr input = ctxt->input;
5249 state = ctxt->instate;
5250 ctxt->instate = XML_PARSER_PI;
5251 /*
5252 * this is a Processing Instruction.
5253 */
5254 SKIP(2);
5255 SHRINK;
5256
5257 /*
5258 * Parse the target name and check for special support like
5259 * namespace.
5260 */
5261 target = xmlParsePITarget(ctxt);
5262 if (target != NULL) {
5263 if ((RAW == '?') && (NXT(1) == '>')) {
5264 if (input != ctxt->input) {
5265 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5266 "PI declaration doesn't start and stop in the same entity\n");
5267 }
5268 SKIP(2);
5269
5270 /*
5271 * SAX: PI detected.
5272 */
5273 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5274 (ctxt->sax->processingInstruction != NULL))
5275 ctxt->sax->processingInstruction(ctxt->userData,
5276 target, NULL);
5277 if (ctxt->instate != XML_PARSER_EOF)
5278 ctxt->instate = state;
5279 return;
5280 }
5281 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5282 if (buf == NULL) {
5283 xmlErrMemory(ctxt, NULL);
5284 ctxt->instate = state;
5285 return;
5286 }
5287 cur = CUR;
5288 if (!IS_BLANK(cur)) {
5289 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5290 "ParsePI: PI %s space expected\n", target);
5291 }
5292 SKIP_BLANKS;
5293 cur = CUR_CHAR(l);
5294 while (IS_CHAR(cur) && /* checked */
5295 ((cur != '?') || (NXT(1) != '>'))) {
5296 if (len + 5 >= size) {
5297 xmlChar *tmp;
5298 size_t new_size = size * 2;
5299 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5300 if (tmp == NULL) {
5301 xmlErrMemory(ctxt, NULL);
5302 xmlFree(buf);
5303 ctxt->instate = state;
5304 return;
5305 }
5306 buf = tmp;
5307 size = new_size;
5308 }
5309 count++;
5310 if (count > 50) {
5311 GROW;
5312 if (ctxt->instate == XML_PARSER_EOF) {
5313 xmlFree(buf);
5314 return;
5315 }
5316 count = 0;
5317 if ((len > XML_MAX_TEXT_LENGTH) &&
5318 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5319 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5320 "PI %s too big found", target);
5321 xmlFree(buf);
5322 ctxt->instate = state;
5323 return;
5324 }
5325 }
5326 COPY_BUF(l,buf,len,cur);
5327 NEXTL(l);
5328 cur = CUR_CHAR(l);
5329 if (cur == 0) {
5330 SHRINK;
5331 GROW;
5332 cur = CUR_CHAR(l);
5333 }
5334 }
5335 if ((len > XML_MAX_TEXT_LENGTH) &&
5336 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5337 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5338 "PI %s too big found", target);
5339 xmlFree(buf);
5340 ctxt->instate = state;
5341 return;
5342 }
5343 buf[len] = 0;
5344 if (cur != '?') {
5345 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5346 "ParsePI: PI %s never end ...\n", target);
5347 } else {
5348 if (input != ctxt->input) {
5349 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5350 "PI declaration doesn't start and stop in the same entity\n");
5351 }
5352 SKIP(2);
5353
5354 #ifdef LIBXML_CATALOG_ENABLED
5355 if (((state == XML_PARSER_MISC) ||
5356 (state == XML_PARSER_START)) &&
5357 (xmlStrEqual(target, XML_CATALOG_PI))) {
5358 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5359 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5360 (allow == XML_CATA_ALLOW_ALL))
5361 xmlParseCatalogPI(ctxt, buf);
5362 }
5363 #endif
5364
5365
5366 /*
5367 * SAX: PI detected.
5368 */
5369 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5370 (ctxt->sax->processingInstruction != NULL))
5371 ctxt->sax->processingInstruction(ctxt->userData,
5372 target, buf);
5373 }
5374 xmlFree(buf);
5375 } else {
5376 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5377 }
5378 if (ctxt->instate != XML_PARSER_EOF)
5379 ctxt->instate = state;
5380 }
5381 }
5382
5383 /**
5384 * xmlParseNotationDecl:
5385 * @ctxt: an XML parser context
5386 *
5387 * parse a notation declaration
5388 *
5389 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5390 *
5391 * Hence there is actually 3 choices:
5392 * 'PUBLIC' S PubidLiteral
5393 * 'PUBLIC' S PubidLiteral S SystemLiteral
5394 * and 'SYSTEM' S SystemLiteral
5395 *
5396 * See the NOTE on xmlParseExternalID().
5397 */
5398
5399 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5400 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5401 const xmlChar *name;
5402 xmlChar *Pubid;
5403 xmlChar *Systemid;
5404
5405 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5406 xmlParserInputPtr input = ctxt->input;
5407 SHRINK;
5408 SKIP(10);
5409 if (!IS_BLANK_CH(CUR)) {
5410 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5411 "Space required after '<!NOTATION'\n");
5412 return;
5413 }
5414 SKIP_BLANKS;
5415
5416 name = xmlParseName(ctxt);
5417 if (name == NULL) {
5418 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5419 return;
5420 }
5421 if (!IS_BLANK_CH(CUR)) {
5422 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423 "Space required after the NOTATION name'\n");
5424 return;
5425 }
5426 if (xmlStrchr(name, ':') != NULL) {
5427 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5428 "colons are forbidden from notation names '%s'\n",
5429 name, NULL, NULL);
5430 }
5431 SKIP_BLANKS;
5432
5433 /*
5434 * Parse the IDs.
5435 */
5436 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5437 SKIP_BLANKS;
5438
5439 if (RAW == '>') {
5440 if (input != ctxt->input) {
5441 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5442 "Notation declaration doesn't start and stop in the same entity\n");
5443 }
5444 NEXT;
5445 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5446 (ctxt->sax->notationDecl != NULL))
5447 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5448 } else {
5449 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5450 }
5451 if (Systemid != NULL) xmlFree(Systemid);
5452 if (Pubid != NULL) xmlFree(Pubid);
5453 }
5454 }
5455
5456 /**
5457 * xmlParseEntityDecl:
5458 * @ctxt: an XML parser context
5459 *
5460 * parse <!ENTITY declarations
5461 *
5462 * [70] EntityDecl ::= GEDecl | PEDecl
5463 *
5464 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5465 *
5466 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5467 *
5468 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5469 *
5470 * [74] PEDef ::= EntityValue | ExternalID
5471 *
5472 * [76] NDataDecl ::= S 'NDATA' S Name
5473 *
5474 * [ VC: Notation Declared ]
5475 * The Name must match the declared name of a notation.
5476 */
5477
5478 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5479 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5480 const xmlChar *name = NULL;
5481 xmlChar *value = NULL;
5482 xmlChar *URI = NULL, *literal = NULL;
5483 const xmlChar *ndata = NULL;
5484 int isParameter = 0;
5485 xmlChar *orig = NULL;
5486 int skipped;
5487
5488 /* GROW; done in the caller */
5489 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5490 xmlParserInputPtr input = ctxt->input;
5491 SHRINK;
5492 SKIP(8);
5493 skipped = SKIP_BLANKS;
5494 if (skipped == 0) {
5495 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5496 "Space required after '<!ENTITY'\n");
5497 }
5498
5499 if (RAW == '%') {
5500 NEXT;
5501 skipped = SKIP_BLANKS;
5502 if (skipped == 0) {
5503 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5504 "Space required after '%'\n");
5505 }
5506 isParameter = 1;
5507 }
5508
5509 name = xmlParseName(ctxt);
5510 if (name == NULL) {
5511 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5512 "xmlParseEntityDecl: no name\n");
5513 return;
5514 }
5515 if (xmlStrchr(name, ':') != NULL) {
5516 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5517 "colons are forbidden from entities names '%s'\n",
5518 name, NULL, NULL);
5519 }
5520 skipped = SKIP_BLANKS;
5521 if (skipped == 0) {
5522 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5523 "Space required after the entity name\n");
5524 }
5525
5526 ctxt->instate = XML_PARSER_ENTITY_DECL;
5527 /*
5528 * handle the various case of definitions...
5529 */
5530 if (isParameter) {
5531 if ((RAW == '"') || (RAW == '\'')) {
5532 value = xmlParseEntityValue(ctxt, &orig);
5533 if (value) {
5534 if ((ctxt->sax != NULL) &&
5535 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5536 ctxt->sax->entityDecl(ctxt->userData, name,
5537 XML_INTERNAL_PARAMETER_ENTITY,
5538 NULL, NULL, value);
5539 }
5540 } else {
5541 URI = xmlParseExternalID(ctxt, &literal, 1);
5542 if ((URI == NULL) && (literal == NULL)) {
5543 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5544 }
5545 if (URI) {
5546 xmlURIPtr uri;
5547
5548 uri = xmlParseURI((const char *) URI);
5549 if (uri == NULL) {
5550 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5551 "Invalid URI: %s\n", URI);
5552 /*
5553 * This really ought to be a well formedness error
5554 * but the XML Core WG decided otherwise c.f. issue
5555 * E26 of the XML erratas.
5556 */
5557 } else {
5558 if (uri->fragment != NULL) {
5559 /*
5560 * Okay this is foolish to block those but not
5561 * invalid URIs.
5562 */
5563 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5564 } else {
5565 if ((ctxt->sax != NULL) &&
5566 (!ctxt->disableSAX) &&
5567 (ctxt->sax->entityDecl != NULL))
5568 ctxt->sax->entityDecl(ctxt->userData, name,
5569 XML_EXTERNAL_PARAMETER_ENTITY,
5570 literal, URI, NULL);
5571 }
5572 xmlFreeURI(uri);
5573 }
5574 }
5575 }
5576 } else {
5577 if ((RAW == '"') || (RAW == '\'')) {
5578 value = xmlParseEntityValue(ctxt, &orig);
5579 if ((ctxt->sax != NULL) &&
5580 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5581 ctxt->sax->entityDecl(ctxt->userData, name,
5582 XML_INTERNAL_GENERAL_ENTITY,
5583 NULL, NULL, value);
5584 /*
5585 * For expat compatibility in SAX mode.
5586 */
5587 if ((ctxt->myDoc == NULL) ||
5588 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5589 if (ctxt->myDoc == NULL) {
5590 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5591 if (ctxt->myDoc == NULL) {
5592 xmlErrMemory(ctxt, "New Doc failed");
5593 return;
5594 }
5595 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5596 }
5597 if (ctxt->myDoc->intSubset == NULL)
5598 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5599 BAD_CAST "fake", NULL, NULL);
5600
5601 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5602 NULL, NULL, value);
5603 }
5604 } else {
5605 URI = xmlParseExternalID(ctxt, &literal, 1);
5606 if ((URI == NULL) && (literal == NULL)) {
5607 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5608 }
5609 if (URI) {
5610 xmlURIPtr uri;
5611
5612 uri = xmlParseURI((const char *)URI);
5613 if (uri == NULL) {
5614 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5615 "Invalid URI: %s\n", URI);
5616 /*
5617 * This really ought to be a well formedness error
5618 * but the XML Core WG decided otherwise c.f. issue
5619 * E26 of the XML erratas.
5620 */
5621 } else {
5622 if (uri->fragment != NULL) {
5623 /*
5624 * Okay this is foolish to block those but not
5625 * invalid URIs.
5626 */
5627 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5628 }
5629 xmlFreeURI(uri);
5630 }
5631 }
5632 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5633 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5634 "Space required before 'NDATA'\n");
5635 }
5636 SKIP_BLANKS;
5637 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5638 SKIP(5);
5639 if (!IS_BLANK_CH(CUR)) {
5640 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5641 "Space required after 'NDATA'\n");
5642 }
5643 SKIP_BLANKS;
5644 ndata = xmlParseName(ctxt);
5645 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5646 (ctxt->sax->unparsedEntityDecl != NULL))
5647 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5648 literal, URI, ndata);
5649 } else {
5650 if ((ctxt->sax != NULL) &&
5651 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5652 ctxt->sax->entityDecl(ctxt->userData, name,
5653 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5654 literal, URI, NULL);
5655 /*
5656 * For expat compatibility in SAX mode.
5657 * assuming the entity repalcement was asked for
5658 */
5659 if ((ctxt->replaceEntities != 0) &&
5660 ((ctxt->myDoc == NULL) ||
5661 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5662 if (ctxt->myDoc == NULL) {
5663 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5664 if (ctxt->myDoc == NULL) {
5665 xmlErrMemory(ctxt, "New Doc failed");
5666 return;
5667 }
5668 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5669 }
5670
5671 if (ctxt->myDoc->intSubset == NULL)
5672 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5673 BAD_CAST "fake", NULL, NULL);
5674 xmlSAX2EntityDecl(ctxt, name,
5675 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5676 literal, URI, NULL);
5677 }
5678 }
5679 }
5680 }
5681 if (ctxt->instate == XML_PARSER_EOF)
5682 return;
5683 SKIP_BLANKS;
5684 if (RAW != '>') {
5685 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5686 "xmlParseEntityDecl: entity %s not terminated\n", name);
5687 xmlHaltParser(ctxt);
5688 } else {
5689 if (input != ctxt->input) {
5690 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5691 "Entity declaration doesn't start and stop in the same entity\n");
5692 }
5693 NEXT;
5694 }
5695 if (orig != NULL) {
5696 /*
5697 * Ugly mechanism to save the raw entity value.
5698 */
5699 xmlEntityPtr cur = NULL;
5700
5701 if (isParameter) {
5702 if ((ctxt->sax != NULL) &&
5703 (ctxt->sax->getParameterEntity != NULL))
5704 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5705 } else {
5706 if ((ctxt->sax != NULL) &&
5707 (ctxt->sax->getEntity != NULL))
5708 cur = ctxt->sax->getEntity(ctxt->userData, name);
5709 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5710 cur = xmlSAX2GetEntity(ctxt, name);
5711 }
5712 }
5713 if (cur != NULL) {
5714 if (cur->orig != NULL)
5715 xmlFree(orig);
5716 else
5717 cur->orig = orig;
5718 } else
5719 xmlFree(orig);
5720 }
5721 if (value != NULL) xmlFree(value);
5722 if (URI != NULL) xmlFree(URI);
5723 if (literal != NULL) xmlFree(literal);
5724 }
5725 }
5726
5727 /**
5728 * xmlParseDefaultDecl:
5729 * @ctxt: an XML parser context
5730 * @value: Receive a possible fixed default value for the attribute
5731 *
5732 * Parse an attribute default declaration
5733 *
5734 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5735 *
5736 * [ VC: Required Attribute ]
5737 * if the default declaration is the keyword #REQUIRED, then the
5738 * attribute must be specified for all elements of the type in the
5739 * attribute-list declaration.
5740 *
5741 * [ VC: Attribute Default Legal ]
5742 * The declared default value must meet the lexical constraints of
5743 * the declared attribute type c.f. xmlValidateAttributeDecl()
5744 *
5745 * [ VC: Fixed Attribute Default ]
5746 * if an attribute has a default value declared with the #FIXED
5747 * keyword, instances of that attribute must match the default value.
5748 *
5749 * [ WFC: No < in Attribute Values ]
5750 * handled in xmlParseAttValue()
5751 *
5752 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5753 * or XML_ATTRIBUTE_FIXED.
5754 */
5755
5756 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5757 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5758 int val;
5759 xmlChar *ret;
5760
5761 *value = NULL;
5762 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5763 SKIP(9);
5764 return(XML_ATTRIBUTE_REQUIRED);
5765 }
5766 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5767 SKIP(8);
5768 return(XML_ATTRIBUTE_IMPLIED);
5769 }
5770 val = XML_ATTRIBUTE_NONE;
5771 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5772 SKIP(6);
5773 val = XML_ATTRIBUTE_FIXED;
5774 if (!IS_BLANK_CH(CUR)) {
5775 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5776 "Space required after '#FIXED'\n");
5777 }
5778 SKIP_BLANKS;
5779 }
5780 ret = xmlParseAttValue(ctxt);
5781 ctxt->instate = XML_PARSER_DTD;
5782 if (ret == NULL) {
5783 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5784 "Attribute default value declaration error\n");
5785 } else
5786 *value = ret;
5787 return(val);
5788 }
5789
5790 /**
5791 * xmlParseNotationType:
5792 * @ctxt: an XML parser context
5793 *
5794 * parse an Notation attribute type.
5795 *
5796 * Note: the leading 'NOTATION' S part has already being parsed...
5797 *
5798 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5799 *
5800 * [ VC: Notation Attributes ]
5801 * Values of this type must match one of the notation names included
5802 * in the declaration; all notation names in the declaration must be declared.
5803 *
5804 * Returns: the notation attribute tree built while parsing
5805 */
5806
5807 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5808 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5809 const xmlChar *name;
5810 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5811
5812 if (RAW != '(') {
5813 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5814 return(NULL);
5815 }
5816 SHRINK;
5817 do {
5818 NEXT;
5819 SKIP_BLANKS;
5820 name = xmlParseName(ctxt);
5821 if (name == NULL) {
5822 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5823 "Name expected in NOTATION declaration\n");
5824 xmlFreeEnumeration(ret);
5825 return(NULL);
5826 }
5827 tmp = ret;
5828 while (tmp != NULL) {
5829 if (xmlStrEqual(name, tmp->name)) {
5830 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5831 "standalone: attribute notation value token %s duplicated\n",
5832 name, NULL);
5833 if (!xmlDictOwns(ctxt->dict, name))
5834 xmlFree((xmlChar *) name);
5835 break;
5836 }
5837 tmp = tmp->next;
5838 }
5839 if (tmp == NULL) {
5840 cur = xmlCreateEnumeration(name);
5841 if (cur == NULL) {
5842 xmlFreeEnumeration(ret);
5843 return(NULL);
5844 }
5845 if (last == NULL) ret = last = cur;
5846 else {
5847 last->next = cur;
5848 last = cur;
5849 }
5850 }
5851 SKIP_BLANKS;
5852 } while (RAW == '|');
5853 if (RAW != ')') {
5854 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5855 xmlFreeEnumeration(ret);
5856 return(NULL);
5857 }
5858 NEXT;
5859 return(ret);
5860 }
5861
5862 /**
5863 * xmlParseEnumerationType:
5864 * @ctxt: an XML parser context
5865 *
5866 * parse an Enumeration attribute type.
5867 *
5868 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5869 *
5870 * [ VC: Enumeration ]
5871 * Values of this type must match one of the Nmtoken tokens in
5872 * the declaration
5873 *
5874 * Returns: the enumeration attribute tree built while parsing
5875 */
5876
5877 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)5878 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5879 xmlChar *name;
5880 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5881
5882 if (RAW != '(') {
5883 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5884 return(NULL);
5885 }
5886 SHRINK;
5887 do {
5888 NEXT;
5889 SKIP_BLANKS;
5890 name = xmlParseNmtoken(ctxt);
5891 if (name == NULL) {
5892 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5893 return(ret);
5894 }
5895 tmp = ret;
5896 while (tmp != NULL) {
5897 if (xmlStrEqual(name, tmp->name)) {
5898 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5899 "standalone: attribute enumeration value token %s duplicated\n",
5900 name, NULL);
5901 if (!xmlDictOwns(ctxt->dict, name))
5902 xmlFree(name);
5903 break;
5904 }
5905 tmp = tmp->next;
5906 }
5907 if (tmp == NULL) {
5908 cur = xmlCreateEnumeration(name);
5909 if (!xmlDictOwns(ctxt->dict, name))
5910 xmlFree(name);
5911 if (cur == NULL) {
5912 xmlFreeEnumeration(ret);
5913 return(NULL);
5914 }
5915 if (last == NULL) ret = last = cur;
5916 else {
5917 last->next = cur;
5918 last = cur;
5919 }
5920 }
5921 SKIP_BLANKS;
5922 } while (RAW == '|');
5923 if (RAW != ')') {
5924 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5925 return(ret);
5926 }
5927 NEXT;
5928 return(ret);
5929 }
5930
5931 /**
5932 * xmlParseEnumeratedType:
5933 * @ctxt: an XML parser context
5934 * @tree: the enumeration tree built while parsing
5935 *
5936 * parse an Enumerated attribute type.
5937 *
5938 * [57] EnumeratedType ::= NotationType | Enumeration
5939 *
5940 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5941 *
5942 *
5943 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5944 */
5945
5946 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5947 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5948 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5949 SKIP(8);
5950 if (!IS_BLANK_CH(CUR)) {
5951 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5952 "Space required after 'NOTATION'\n");
5953 return(0);
5954 }
5955 SKIP_BLANKS;
5956 *tree = xmlParseNotationType(ctxt);
5957 if (*tree == NULL) return(0);
5958 return(XML_ATTRIBUTE_NOTATION);
5959 }
5960 *tree = xmlParseEnumerationType(ctxt);
5961 if (*tree == NULL) return(0);
5962 return(XML_ATTRIBUTE_ENUMERATION);
5963 }
5964
5965 /**
5966 * xmlParseAttributeType:
5967 * @ctxt: an XML parser context
5968 * @tree: the enumeration tree built while parsing
5969 *
5970 * parse the Attribute list def for an element
5971 *
5972 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5973 *
5974 * [55] StringType ::= 'CDATA'
5975 *
5976 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5977 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5978 *
5979 * Validity constraints for attribute values syntax are checked in
5980 * xmlValidateAttributeValue()
5981 *
5982 * [ VC: ID ]
5983 * Values of type ID must match the Name production. A name must not
5984 * appear more than once in an XML document as a value of this type;
5985 * i.e., ID values must uniquely identify the elements which bear them.
5986 *
5987 * [ VC: One ID per Element Type ]
5988 * No element type may have more than one ID attribute specified.
5989 *
5990 * [ VC: ID Attribute Default ]
5991 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5992 *
5993 * [ VC: IDREF ]
5994 * Values of type IDREF must match the Name production, and values
5995 * of type IDREFS must match Names; each IDREF Name must match the value
5996 * of an ID attribute on some element in the XML document; i.e. IDREF
5997 * values must match the value of some ID attribute.
5998 *
5999 * [ VC: Entity Name ]
6000 * Values of type ENTITY must match the Name production, values
6001 * of type ENTITIES must match Names; each Entity Name must match the
6002 * name of an unparsed entity declared in the DTD.
6003 *
6004 * [ VC: Name Token ]
6005 * Values of type NMTOKEN must match the Nmtoken production; values
6006 * of type NMTOKENS must match Nmtokens.
6007 *
6008 * Returns the attribute type
6009 */
6010 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)6011 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6012 SHRINK;
6013 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6014 SKIP(5);
6015 return(XML_ATTRIBUTE_CDATA);
6016 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6017 SKIP(6);
6018 return(XML_ATTRIBUTE_IDREFS);
6019 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6020 SKIP(5);
6021 return(XML_ATTRIBUTE_IDREF);
6022 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6023 SKIP(2);
6024 return(XML_ATTRIBUTE_ID);
6025 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6026 SKIP(6);
6027 return(XML_ATTRIBUTE_ENTITY);
6028 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6029 SKIP(8);
6030 return(XML_ATTRIBUTE_ENTITIES);
6031 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6032 SKIP(8);
6033 return(XML_ATTRIBUTE_NMTOKENS);
6034 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6035 SKIP(7);
6036 return(XML_ATTRIBUTE_NMTOKEN);
6037 }
6038 return(xmlParseEnumeratedType(ctxt, tree));
6039 }
6040
6041 /**
6042 * xmlParseAttributeListDecl:
6043 * @ctxt: an XML parser context
6044 *
6045 * : parse the Attribute list def for an element
6046 *
6047 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6048 *
6049 * [53] AttDef ::= S Name S AttType S DefaultDecl
6050 *
6051 */
6052 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)6053 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6054 const xmlChar *elemName;
6055 const xmlChar *attrName;
6056 xmlEnumerationPtr tree;
6057
6058 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6059 xmlParserInputPtr input = ctxt->input;
6060
6061 SKIP(9);
6062 if (!IS_BLANK_CH(CUR)) {
6063 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6064 "Space required after '<!ATTLIST'\n");
6065 }
6066 SKIP_BLANKS;
6067 elemName = xmlParseName(ctxt);
6068 if (elemName == NULL) {
6069 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6070 "ATTLIST: no name for Element\n");
6071 return;
6072 }
6073 SKIP_BLANKS;
6074 GROW;
6075 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6076 const xmlChar *check = CUR_PTR;
6077 int type;
6078 int def;
6079 xmlChar *defaultValue = NULL;
6080
6081 GROW;
6082 tree = NULL;
6083 attrName = xmlParseName(ctxt);
6084 if (attrName == NULL) {
6085 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6086 "ATTLIST: no name for Attribute\n");
6087 break;
6088 }
6089 GROW;
6090 if (!IS_BLANK_CH(CUR)) {
6091 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6092 "Space required after the attribute name\n");
6093 break;
6094 }
6095 SKIP_BLANKS;
6096
6097 type = xmlParseAttributeType(ctxt, &tree);
6098 if (type <= 0) {
6099 break;
6100 }
6101
6102 GROW;
6103 if (!IS_BLANK_CH(CUR)) {
6104 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6105 "Space required after the attribute type\n");
6106 if (tree != NULL)
6107 xmlFreeEnumeration(tree);
6108 break;
6109 }
6110 SKIP_BLANKS;
6111
6112 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6113 if (def <= 0) {
6114 if (defaultValue != NULL)
6115 xmlFree(defaultValue);
6116 if (tree != NULL)
6117 xmlFreeEnumeration(tree);
6118 break;
6119 }
6120 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6121 xmlAttrNormalizeSpace(defaultValue, defaultValue);
6122
6123 GROW;
6124 if (RAW != '>') {
6125 if (!IS_BLANK_CH(CUR)) {
6126 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6127 "Space required after the attribute default value\n");
6128 if (defaultValue != NULL)
6129 xmlFree(defaultValue);
6130 if (tree != NULL)
6131 xmlFreeEnumeration(tree);
6132 break;
6133 }
6134 SKIP_BLANKS;
6135 }
6136 if (check == CUR_PTR) {
6137 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6138 "in xmlParseAttributeListDecl\n");
6139 if (defaultValue != NULL)
6140 xmlFree(defaultValue);
6141 if (tree != NULL)
6142 xmlFreeEnumeration(tree);
6143 break;
6144 }
6145 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6146 (ctxt->sax->attributeDecl != NULL))
6147 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6148 type, def, defaultValue, tree);
6149 else if (tree != NULL)
6150 xmlFreeEnumeration(tree);
6151
6152 if ((ctxt->sax2) && (defaultValue != NULL) &&
6153 (def != XML_ATTRIBUTE_IMPLIED) &&
6154 (def != XML_ATTRIBUTE_REQUIRED)) {
6155 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6156 }
6157 if (ctxt->sax2) {
6158 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6159 }
6160 if (defaultValue != NULL)
6161 xmlFree(defaultValue);
6162 GROW;
6163 }
6164 if (RAW == '>') {
6165 if (input != ctxt->input) {
6166 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6167 "Attribute list declaration doesn't start and stop in the same entity\n",
6168 NULL, NULL);
6169 }
6170 NEXT;
6171 }
6172 }
6173 }
6174
6175 /**
6176 * xmlParseElementMixedContentDecl:
6177 * @ctxt: an XML parser context
6178 * @inputchk: the input used for the current entity, needed for boundary checks
6179 *
6180 * parse the declaration for a Mixed Element content
6181 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6182 *
6183 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6184 * '(' S? '#PCDATA' S? ')'
6185 *
6186 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6187 *
6188 * [ VC: No Duplicate Types ]
6189 * The same name must not appear more than once in a single
6190 * mixed-content declaration.
6191 *
6192 * returns: the list of the xmlElementContentPtr describing the element choices
6193 */
6194 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6195 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6196 xmlElementContentPtr ret = NULL, cur = NULL, n;
6197 const xmlChar *elem = NULL;
6198
6199 GROW;
6200 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6201 SKIP(7);
6202 SKIP_BLANKS;
6203 SHRINK;
6204 if (RAW == ')') {
6205 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6206 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6207 "Element content declaration doesn't start and stop in the same entity\n",
6208 NULL, NULL);
6209 }
6210 NEXT;
6211 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6212 if (ret == NULL)
6213 return(NULL);
6214 if (RAW == '*') {
6215 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6216 NEXT;
6217 }
6218 return(ret);
6219 }
6220 if ((RAW == '(') || (RAW == '|')) {
6221 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6222 if (ret == NULL) return(NULL);
6223 }
6224 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6225 NEXT;
6226 if (elem == NULL) {
6227 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6228 if (ret == NULL) return(NULL);
6229 ret->c1 = cur;
6230 if (cur != NULL)
6231 cur->parent = ret;
6232 cur = ret;
6233 } else {
6234 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6235 if (n == NULL) return(NULL);
6236 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6237 if (n->c1 != NULL)
6238 n->c1->parent = n;
6239 cur->c2 = n;
6240 if (n != NULL)
6241 n->parent = cur;
6242 cur = n;
6243 }
6244 SKIP_BLANKS;
6245 elem = xmlParseName(ctxt);
6246 if (elem == NULL) {
6247 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6248 "xmlParseElementMixedContentDecl : Name expected\n");
6249 xmlFreeDocElementContent(ctxt->myDoc, cur);
6250 return(NULL);
6251 }
6252 SKIP_BLANKS;
6253 GROW;
6254 }
6255 if ((RAW == ')') && (NXT(1) == '*')) {
6256 if (elem != NULL) {
6257 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6258 XML_ELEMENT_CONTENT_ELEMENT);
6259 if (cur->c2 != NULL)
6260 cur->c2->parent = cur;
6261 }
6262 if (ret != NULL)
6263 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6264 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6265 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6266 "Element content declaration doesn't start and stop in the same entity\n",
6267 NULL, NULL);
6268 }
6269 SKIP(2);
6270 } else {
6271 xmlFreeDocElementContent(ctxt->myDoc, ret);
6272 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6273 return(NULL);
6274 }
6275
6276 } else {
6277 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6278 }
6279 return(ret);
6280 }
6281
6282 /**
6283 * xmlParseElementChildrenContentDeclPriv:
6284 * @ctxt: an XML parser context
6285 * @inputchk: the input used for the current entity, needed for boundary checks
6286 * @depth: the level of recursion
6287 *
6288 * parse the declaration for a Mixed Element content
6289 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6290 *
6291 *
6292 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6293 *
6294 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6295 *
6296 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6297 *
6298 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6299 *
6300 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6301 * TODO Parameter-entity replacement text must be properly nested
6302 * with parenthesized groups. That is to say, if either of the
6303 * opening or closing parentheses in a choice, seq, or Mixed
6304 * construct is contained in the replacement text for a parameter
6305 * entity, both must be contained in the same replacement text. For
6306 * interoperability, if a parameter-entity reference appears in a
6307 * choice, seq, or Mixed construct, its replacement text should not
6308 * be empty, and neither the first nor last non-blank character of
6309 * the replacement text should be a connector (| or ,).
6310 *
6311 * Returns the tree of xmlElementContentPtr describing the element
6312 * hierarchy.
6313 */
6314 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)6315 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6316 int depth) {
6317 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6318 const xmlChar *elem;
6319 xmlChar type = 0;
6320
6321 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6322 (depth > 2048)) {
6323 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6324 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6325 depth);
6326 return(NULL);
6327 }
6328 SKIP_BLANKS;
6329 GROW;
6330 if (RAW == '(') {
6331 int inputid = ctxt->input->id;
6332
6333 /* Recurse on first child */
6334 NEXT;
6335 SKIP_BLANKS;
6336 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6337 depth + 1);
6338 SKIP_BLANKS;
6339 GROW;
6340 } else {
6341 elem = xmlParseName(ctxt);
6342 if (elem == NULL) {
6343 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6344 return(NULL);
6345 }
6346 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6347 if (cur == NULL) {
6348 xmlErrMemory(ctxt, NULL);
6349 return(NULL);
6350 }
6351 GROW;
6352 if (RAW == '?') {
6353 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6354 NEXT;
6355 } else if (RAW == '*') {
6356 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6357 NEXT;
6358 } else if (RAW == '+') {
6359 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6360 NEXT;
6361 } else {
6362 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6363 }
6364 GROW;
6365 }
6366 SKIP_BLANKS;
6367 SHRINK;
6368 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6369 /*
6370 * Each loop we parse one separator and one element.
6371 */
6372 if (RAW == ',') {
6373 if (type == 0) type = CUR;
6374
6375 /*
6376 * Detect "Name | Name , Name" error
6377 */
6378 else if (type != CUR) {
6379 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6380 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6381 type);
6382 if ((last != NULL) && (last != ret))
6383 xmlFreeDocElementContent(ctxt->myDoc, last);
6384 if (ret != NULL)
6385 xmlFreeDocElementContent(ctxt->myDoc, ret);
6386 return(NULL);
6387 }
6388 NEXT;
6389
6390 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6391 if (op == NULL) {
6392 if ((last != NULL) && (last != ret))
6393 xmlFreeDocElementContent(ctxt->myDoc, last);
6394 xmlFreeDocElementContent(ctxt->myDoc, ret);
6395 return(NULL);
6396 }
6397 if (last == NULL) {
6398 op->c1 = ret;
6399 if (ret != NULL)
6400 ret->parent = op;
6401 ret = cur = op;
6402 } else {
6403 cur->c2 = op;
6404 if (op != NULL)
6405 op->parent = cur;
6406 op->c1 = last;
6407 if (last != NULL)
6408 last->parent = op;
6409 cur =op;
6410 last = NULL;
6411 }
6412 } else if (RAW == '|') {
6413 if (type == 0) type = CUR;
6414
6415 /*
6416 * Detect "Name , Name | Name" error
6417 */
6418 else if (type != CUR) {
6419 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6420 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6421 type);
6422 if ((last != NULL) && (last != ret))
6423 xmlFreeDocElementContent(ctxt->myDoc, last);
6424 if (ret != NULL)
6425 xmlFreeDocElementContent(ctxt->myDoc, ret);
6426 return(NULL);
6427 }
6428 NEXT;
6429
6430 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6431 if (op == NULL) {
6432 if ((last != NULL) && (last != ret))
6433 xmlFreeDocElementContent(ctxt->myDoc, last);
6434 if (ret != NULL)
6435 xmlFreeDocElementContent(ctxt->myDoc, ret);
6436 return(NULL);
6437 }
6438 if (last == NULL) {
6439 op->c1 = ret;
6440 if (ret != NULL)
6441 ret->parent = op;
6442 ret = cur = op;
6443 } else {
6444 cur->c2 = op;
6445 if (op != NULL)
6446 op->parent = cur;
6447 op->c1 = last;
6448 if (last != NULL)
6449 last->parent = op;
6450 cur =op;
6451 last = NULL;
6452 }
6453 } else {
6454 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6455 if ((last != NULL) && (last != ret))
6456 xmlFreeDocElementContent(ctxt->myDoc, last);
6457 if (ret != NULL)
6458 xmlFreeDocElementContent(ctxt->myDoc, ret);
6459 return(NULL);
6460 }
6461 GROW;
6462 SKIP_BLANKS;
6463 GROW;
6464 if (RAW == '(') {
6465 int inputid = ctxt->input->id;
6466 /* Recurse on second child */
6467 NEXT;
6468 SKIP_BLANKS;
6469 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6470 depth + 1);
6471 SKIP_BLANKS;
6472 } else {
6473 elem = xmlParseName(ctxt);
6474 if (elem == NULL) {
6475 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6476 if (ret != NULL)
6477 xmlFreeDocElementContent(ctxt->myDoc, ret);
6478 return(NULL);
6479 }
6480 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6481 if (last == NULL) {
6482 if (ret != NULL)
6483 xmlFreeDocElementContent(ctxt->myDoc, ret);
6484 return(NULL);
6485 }
6486 if (RAW == '?') {
6487 last->ocur = XML_ELEMENT_CONTENT_OPT;
6488 NEXT;
6489 } else if (RAW == '*') {
6490 last->ocur = XML_ELEMENT_CONTENT_MULT;
6491 NEXT;
6492 } else if (RAW == '+') {
6493 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6494 NEXT;
6495 } else {
6496 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6497 }
6498 }
6499 SKIP_BLANKS;
6500 GROW;
6501 }
6502 if ((cur != NULL) && (last != NULL)) {
6503 cur->c2 = last;
6504 if (last != NULL)
6505 last->parent = cur;
6506 }
6507 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6508 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6509 "Element content declaration doesn't start and stop in the same entity\n",
6510 NULL, NULL);
6511 }
6512 NEXT;
6513 if (RAW == '?') {
6514 if (ret != NULL) {
6515 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6516 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6517 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6518 else
6519 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6520 }
6521 NEXT;
6522 } else if (RAW == '*') {
6523 if (ret != NULL) {
6524 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6525 cur = ret;
6526 /*
6527 * Some normalization:
6528 * (a | b* | c?)* == (a | b | c)*
6529 */
6530 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6531 if ((cur->c1 != NULL) &&
6532 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6533 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6534 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6535 if ((cur->c2 != NULL) &&
6536 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6537 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6538 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6539 cur = cur->c2;
6540 }
6541 }
6542 NEXT;
6543 } else if (RAW == '+') {
6544 if (ret != NULL) {
6545 int found = 0;
6546
6547 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6548 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6549 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6550 else
6551 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6552 /*
6553 * Some normalization:
6554 * (a | b*)+ == (a | b)*
6555 * (a | b?)+ == (a | b)*
6556 */
6557 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6558 if ((cur->c1 != NULL) &&
6559 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6560 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6561 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6562 found = 1;
6563 }
6564 if ((cur->c2 != NULL) &&
6565 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6566 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6567 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6568 found = 1;
6569 }
6570 cur = cur->c2;
6571 }
6572 if (found)
6573 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6574 }
6575 NEXT;
6576 }
6577 return(ret);
6578 }
6579
6580 /**
6581 * xmlParseElementChildrenContentDecl:
6582 * @ctxt: an XML parser context
6583 * @inputchk: the input used for the current entity, needed for boundary checks
6584 *
6585 * parse the declaration for a Mixed Element content
6586 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6587 *
6588 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6589 *
6590 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6591 *
6592 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6593 *
6594 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6595 *
6596 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6597 * TODO Parameter-entity replacement text must be properly nested
6598 * with parenthesized groups. That is to say, if either of the
6599 * opening or closing parentheses in a choice, seq, or Mixed
6600 * construct is contained in the replacement text for a parameter
6601 * entity, both must be contained in the same replacement text. For
6602 * interoperability, if a parameter-entity reference appears in a
6603 * choice, seq, or Mixed construct, its replacement text should not
6604 * be empty, and neither the first nor last non-blank character of
6605 * the replacement text should be a connector (| or ,).
6606 *
6607 * Returns the tree of xmlElementContentPtr describing the element
6608 * hierarchy.
6609 */
6610 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6611 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6612 /* stub left for API/ABI compat */
6613 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6614 }
6615
6616 /**
6617 * xmlParseElementContentDecl:
6618 * @ctxt: an XML parser context
6619 * @name: the name of the element being defined.
6620 * @result: the Element Content pointer will be stored here if any
6621 *
6622 * parse the declaration for an Element content either Mixed or Children,
6623 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6624 *
6625 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6626 *
6627 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6628 */
6629
6630 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6631 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6632 xmlElementContentPtr *result) {
6633
6634 xmlElementContentPtr tree = NULL;
6635 int inputid = ctxt->input->id;
6636 int res;
6637
6638 *result = NULL;
6639
6640 if (RAW != '(') {
6641 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6642 "xmlParseElementContentDecl : %s '(' expected\n", name);
6643 return(-1);
6644 }
6645 NEXT;
6646 GROW;
6647 if (ctxt->instate == XML_PARSER_EOF)
6648 return(-1);
6649 SKIP_BLANKS;
6650 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6651 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6652 res = XML_ELEMENT_TYPE_MIXED;
6653 } else {
6654 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6655 res = XML_ELEMENT_TYPE_ELEMENT;
6656 }
6657 SKIP_BLANKS;
6658 *result = tree;
6659 return(res);
6660 }
6661
6662 /**
6663 * xmlParseElementDecl:
6664 * @ctxt: an XML parser context
6665 *
6666 * parse an Element declaration.
6667 *
6668 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6669 *
6670 * [ VC: Unique Element Type Declaration ]
6671 * No element type may be declared more than once
6672 *
6673 * Returns the type of the element, or -1 in case of error
6674 */
6675 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6676 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6677 const xmlChar *name;
6678 int ret = -1;
6679 xmlElementContentPtr content = NULL;
6680
6681 /* GROW; done in the caller */
6682 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6683 xmlParserInputPtr input = ctxt->input;
6684
6685 SKIP(9);
6686 if (!IS_BLANK_CH(CUR)) {
6687 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6688 "Space required after 'ELEMENT'\n");
6689 }
6690 SKIP_BLANKS;
6691 name = xmlParseName(ctxt);
6692 if (name == NULL) {
6693 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6694 "xmlParseElementDecl: no name for Element\n");
6695 return(-1);
6696 }
6697 while ((RAW == 0) && (ctxt->inputNr > 1))
6698 xmlPopInput(ctxt);
6699 if (!IS_BLANK_CH(CUR)) {
6700 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6701 "Space required after the element name\n");
6702 }
6703 SKIP_BLANKS;
6704 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6705 SKIP(5);
6706 /*
6707 * Element must always be empty.
6708 */
6709 ret = XML_ELEMENT_TYPE_EMPTY;
6710 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6711 (NXT(2) == 'Y')) {
6712 SKIP(3);
6713 /*
6714 * Element is a generic container.
6715 */
6716 ret = XML_ELEMENT_TYPE_ANY;
6717 } else if (RAW == '(') {
6718 ret = xmlParseElementContentDecl(ctxt, name, &content);
6719 } else {
6720 /*
6721 * [ WFC: PEs in Internal Subset ] error handling.
6722 */
6723 if ((RAW == '%') && (ctxt->external == 0) &&
6724 (ctxt->inputNr == 1)) {
6725 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6726 "PEReference: forbidden within markup decl in internal subset\n");
6727 } else {
6728 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6729 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6730 }
6731 return(-1);
6732 }
6733
6734 SKIP_BLANKS;
6735 /*
6736 * Pop-up of finished entities.
6737 */
6738 while ((RAW == 0) && (ctxt->inputNr > 1))
6739 xmlPopInput(ctxt);
6740 SKIP_BLANKS;
6741
6742 if (RAW != '>') {
6743 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6744 if (content != NULL) {
6745 xmlFreeDocElementContent(ctxt->myDoc, content);
6746 }
6747 } else {
6748 if (input != ctxt->input) {
6749 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6750 "Element declaration doesn't start and stop in the same entity\n");
6751 }
6752
6753 NEXT;
6754 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6755 (ctxt->sax->elementDecl != NULL)) {
6756 if (content != NULL)
6757 content->parent = NULL;
6758 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6759 content);
6760 if ((content != NULL) && (content->parent == NULL)) {
6761 /*
6762 * this is a trick: if xmlAddElementDecl is called,
6763 * instead of copying the full tree it is plugged directly
6764 * if called from the parser. Avoid duplicating the
6765 * interfaces or change the API/ABI
6766 */
6767 xmlFreeDocElementContent(ctxt->myDoc, content);
6768 }
6769 } else if (content != NULL) {
6770 xmlFreeDocElementContent(ctxt->myDoc, content);
6771 }
6772 }
6773 }
6774 return(ret);
6775 }
6776
6777 /**
6778 * xmlParseConditionalSections
6779 * @ctxt: an XML parser context
6780 *
6781 * [61] conditionalSect ::= includeSect | ignoreSect
6782 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6783 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6784 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6785 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6786 */
6787
6788 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6789 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6790 int id = ctxt->input->id;
6791
6792 SKIP(3);
6793 SKIP_BLANKS;
6794 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6795 SKIP(7);
6796 SKIP_BLANKS;
6797 if (RAW != '[') {
6798 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6799 xmlHaltParser(ctxt);
6800 return;
6801 } else {
6802 if (ctxt->input->id != id) {
6803 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6804 "All markup of the conditional section is not in the same entity\n",
6805 NULL, NULL);
6806 }
6807 NEXT;
6808 }
6809 if (xmlParserDebugEntities) {
6810 if ((ctxt->input != NULL) && (ctxt->input->filename))
6811 xmlGenericError(xmlGenericErrorContext,
6812 "%s(%d): ", ctxt->input->filename,
6813 ctxt->input->line);
6814 xmlGenericError(xmlGenericErrorContext,
6815 "Entering INCLUDE Conditional Section\n");
6816 }
6817
6818 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6819 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6820 const xmlChar *check = CUR_PTR;
6821 unsigned int cons = ctxt->input->consumed;
6822
6823 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6824 xmlParseConditionalSections(ctxt);
6825 } else if (IS_BLANK_CH(CUR)) {
6826 NEXT;
6827 } else if (RAW == '%') {
6828 xmlParsePEReference(ctxt);
6829 } else
6830 xmlParseMarkupDecl(ctxt);
6831
6832 /*
6833 * Pop-up of finished entities.
6834 */
6835 while ((RAW == 0) && (ctxt->inputNr > 1))
6836 xmlPopInput(ctxt);
6837
6838 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6839 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6840 break;
6841 }
6842 }
6843 if (xmlParserDebugEntities) {
6844 if ((ctxt->input != NULL) && (ctxt->input->filename))
6845 xmlGenericError(xmlGenericErrorContext,
6846 "%s(%d): ", ctxt->input->filename,
6847 ctxt->input->line);
6848 xmlGenericError(xmlGenericErrorContext,
6849 "Leaving INCLUDE Conditional Section\n");
6850 }
6851
6852 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6853 int state;
6854 xmlParserInputState instate;
6855 int depth = 0;
6856
6857 SKIP(6);
6858 SKIP_BLANKS;
6859 if (RAW != '[') {
6860 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6861 xmlHaltParser(ctxt);
6862 return;
6863 } else {
6864 if (ctxt->input->id != id) {
6865 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6866 "All markup of the conditional section is not in the same entity\n",
6867 NULL, NULL);
6868 }
6869 NEXT;
6870 }
6871 if (xmlParserDebugEntities) {
6872 if ((ctxt->input != NULL) && (ctxt->input->filename))
6873 xmlGenericError(xmlGenericErrorContext,
6874 "%s(%d): ", ctxt->input->filename,
6875 ctxt->input->line);
6876 xmlGenericError(xmlGenericErrorContext,
6877 "Entering IGNORE Conditional Section\n");
6878 }
6879
6880 /*
6881 * Parse up to the end of the conditional section
6882 * But disable SAX event generating DTD building in the meantime
6883 */
6884 state = ctxt->disableSAX;
6885 instate = ctxt->instate;
6886 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6887 ctxt->instate = XML_PARSER_IGNORE;
6888
6889 while (((depth >= 0) && (RAW != 0)) &&
6890 (ctxt->instate != XML_PARSER_EOF)) {
6891 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6892 depth++;
6893 SKIP(3);
6894 continue;
6895 }
6896 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6897 if (--depth >= 0) SKIP(3);
6898 continue;
6899 }
6900 NEXT;
6901 continue;
6902 }
6903
6904 ctxt->disableSAX = state;
6905 ctxt->instate = instate;
6906
6907 if (xmlParserDebugEntities) {
6908 if ((ctxt->input != NULL) && (ctxt->input->filename))
6909 xmlGenericError(xmlGenericErrorContext,
6910 "%s(%d): ", ctxt->input->filename,
6911 ctxt->input->line);
6912 xmlGenericError(xmlGenericErrorContext,
6913 "Leaving IGNORE Conditional Section\n");
6914 }
6915
6916 } else {
6917 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6918 xmlHaltParser(ctxt);
6919 return;
6920 }
6921
6922 if (RAW == 0)
6923 SHRINK;
6924
6925 if (RAW == 0) {
6926 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6927 } else {
6928 if (ctxt->input->id != id) {
6929 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6930 "All markup of the conditional section is not in the same entity\n",
6931 NULL, NULL);
6932 }
6933 if ((ctxt-> instate != XML_PARSER_EOF) &&
6934 ((ctxt->input->cur + 3) <= ctxt->input->end))
6935 SKIP(3);
6936 }
6937 }
6938
6939 /**
6940 * xmlParseMarkupDecl:
6941 * @ctxt: an XML parser context
6942 *
6943 * parse Markup declarations
6944 *
6945 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6946 * NotationDecl | PI | Comment
6947 *
6948 * [ VC: Proper Declaration/PE Nesting ]
6949 * Parameter-entity replacement text must be properly nested with
6950 * markup declarations. That is to say, if either the first character
6951 * or the last character of a markup declaration (markupdecl above) is
6952 * contained in the replacement text for a parameter-entity reference,
6953 * both must be contained in the same replacement text.
6954 *
6955 * [ WFC: PEs in Internal Subset ]
6956 * In the internal DTD subset, parameter-entity references can occur
6957 * only where markup declarations can occur, not within markup declarations.
6958 * (This does not apply to references that occur in external parameter
6959 * entities or to the external subset.)
6960 */
6961 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)6962 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6963 GROW;
6964 if (CUR == '<') {
6965 if (NXT(1) == '!') {
6966 switch (NXT(2)) {
6967 case 'E':
6968 if (NXT(3) == 'L')
6969 xmlParseElementDecl(ctxt);
6970 else if (NXT(3) == 'N')
6971 xmlParseEntityDecl(ctxt);
6972 break;
6973 case 'A':
6974 xmlParseAttributeListDecl(ctxt);
6975 break;
6976 case 'N':
6977 xmlParseNotationDecl(ctxt);
6978 break;
6979 case '-':
6980 xmlParseComment(ctxt);
6981 break;
6982 default:
6983 /* there is an error but it will be detected later */
6984 break;
6985 }
6986 } else if (NXT(1) == '?') {
6987 xmlParsePI(ctxt);
6988 }
6989 }
6990
6991 /*
6992 * detect requirement to exit there and act accordingly
6993 * and avoid having instate overriden later on
6994 */
6995 if (ctxt->instate == XML_PARSER_EOF)
6996 return;
6997
6998 /*
6999 * This is only for internal subset. On external entities,
7000 * the replacement is done before parsing stage
7001 */
7002 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
7003 xmlParsePEReference(ctxt);
7004
7005 /*
7006 * Conditional sections are allowed from entities included
7007 * by PE References in the internal subset.
7008 */
7009 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
7010 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7011 xmlParseConditionalSections(ctxt);
7012 }
7013 }
7014
7015 ctxt->instate = XML_PARSER_DTD;
7016 }
7017
7018 /**
7019 * xmlParseTextDecl:
7020 * @ctxt: an XML parser context
7021 *
7022 * parse an XML declaration header for external entities
7023 *
7024 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7025 */
7026
7027 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)7028 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7029 xmlChar *version;
7030 const xmlChar *encoding;
7031
7032 /*
7033 * We know that '<?xml' is here.
7034 */
7035 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7036 SKIP(5);
7037 } else {
7038 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7039 return;
7040 }
7041
7042 if (!IS_BLANK_CH(CUR)) {
7043 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7044 "Space needed after '<?xml'\n");
7045 }
7046 SKIP_BLANKS;
7047
7048 /*
7049 * We may have the VersionInfo here.
7050 */
7051 version = xmlParseVersionInfo(ctxt);
7052 if (version == NULL)
7053 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7054 else {
7055 if (!IS_BLANK_CH(CUR)) {
7056 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7057 "Space needed here\n");
7058 }
7059 }
7060 ctxt->input->version = version;
7061
7062 /*
7063 * We must have the encoding declaration
7064 */
7065 encoding = xmlParseEncodingDecl(ctxt);
7066 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7067 /*
7068 * The XML REC instructs us to stop parsing right here
7069 */
7070 return;
7071 }
7072 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7073 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7074 "Missing encoding in text declaration\n");
7075 }
7076
7077 SKIP_BLANKS;
7078 if ((RAW == '?') && (NXT(1) == '>')) {
7079 SKIP(2);
7080 } else if (RAW == '>') {
7081 /* Deprecated old WD ... */
7082 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7083 NEXT;
7084 } else {
7085 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7086 MOVETO_ENDTAG(CUR_PTR);
7087 NEXT;
7088 }
7089 }
7090
7091 /**
7092 * xmlParseExternalSubset:
7093 * @ctxt: an XML parser context
7094 * @ExternalID: the external identifier
7095 * @SystemID: the system identifier (or URL)
7096 *
7097 * parse Markup declarations from an external subset
7098 *
7099 * [30] extSubset ::= textDecl? extSubsetDecl
7100 *
7101 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7102 */
7103 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)7104 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7105 const xmlChar *SystemID) {
7106 xmlDetectSAX2(ctxt);
7107 GROW;
7108
7109 if ((ctxt->encoding == NULL) &&
7110 (ctxt->input->end - ctxt->input->cur >= 4)) {
7111 xmlChar start[4];
7112 xmlCharEncoding enc;
7113
7114 start[0] = RAW;
7115 start[1] = NXT(1);
7116 start[2] = NXT(2);
7117 start[3] = NXT(3);
7118 enc = xmlDetectCharEncoding(start, 4);
7119 if (enc != XML_CHAR_ENCODING_NONE)
7120 xmlSwitchEncoding(ctxt, enc);
7121 }
7122
7123 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7124 xmlParseTextDecl(ctxt);
7125 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7126 /*
7127 * The XML REC instructs us to stop parsing right here
7128 */
7129 xmlHaltParser(ctxt);
7130 return;
7131 }
7132 }
7133 if (ctxt->myDoc == NULL) {
7134 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7135 if (ctxt->myDoc == NULL) {
7136 xmlErrMemory(ctxt, "New Doc failed");
7137 return;
7138 }
7139 ctxt->myDoc->properties = XML_DOC_INTERNAL;
7140 }
7141 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7142 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7143
7144 ctxt->instate = XML_PARSER_DTD;
7145 ctxt->external = 1;
7146 while (((RAW == '<') && (NXT(1) == '?')) ||
7147 ((RAW == '<') && (NXT(1) == '!')) ||
7148 (RAW == '%') || IS_BLANK_CH(CUR)) {
7149 const xmlChar *check = CUR_PTR;
7150 unsigned int cons = ctxt->input->consumed;
7151
7152 GROW;
7153 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7154 xmlParseConditionalSections(ctxt);
7155 } else if (IS_BLANK_CH(CUR)) {
7156 NEXT;
7157 } else if (RAW == '%') {
7158 xmlParsePEReference(ctxt);
7159 } else
7160 xmlParseMarkupDecl(ctxt);
7161
7162 /*
7163 * Pop-up of finished entities.
7164 */
7165 while ((RAW == 0) && (ctxt->inputNr > 1))
7166 xmlPopInput(ctxt);
7167
7168 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7169 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7170 break;
7171 }
7172 }
7173
7174 if (RAW != 0) {
7175 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7176 }
7177
7178 }
7179
7180 /**
7181 * xmlParseReference:
7182 * @ctxt: an XML parser context
7183 *
7184 * parse and handle entity references in content, depending on the SAX
7185 * interface, this may end-up in a call to character() if this is a
7186 * CharRef, a predefined entity, if there is no reference() callback.
7187 * or if the parser was asked to switch to that mode.
7188 *
7189 * [67] Reference ::= EntityRef | CharRef
7190 */
7191 void
xmlParseReference(xmlParserCtxtPtr ctxt)7192 xmlParseReference(xmlParserCtxtPtr ctxt) {
7193 xmlEntityPtr ent;
7194 xmlChar *val;
7195 int was_checked;
7196 xmlNodePtr list = NULL;
7197 xmlParserErrors ret = XML_ERR_OK;
7198
7199
7200 if (RAW != '&')
7201 return;
7202
7203 /*
7204 * Simple case of a CharRef
7205 */
7206 if (NXT(1) == '#') {
7207 int i = 0;
7208 xmlChar out[10];
7209 int hex = NXT(2);
7210 int value = xmlParseCharRef(ctxt);
7211
7212 if (value == 0)
7213 return;
7214 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7215 /*
7216 * So we are using non-UTF-8 buffers
7217 * Check that the char fit on 8bits, if not
7218 * generate a CharRef.
7219 */
7220 if (value <= 0xFF) {
7221 out[0] = value;
7222 out[1] = 0;
7223 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7224 (!ctxt->disableSAX))
7225 ctxt->sax->characters(ctxt->userData, out, 1);
7226 } else {
7227 if ((hex == 'x') || (hex == 'X'))
7228 snprintf((char *)out, sizeof(out), "#x%X", value);
7229 else
7230 snprintf((char *)out, sizeof(out), "#%d", value);
7231 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7232 (!ctxt->disableSAX))
7233 ctxt->sax->reference(ctxt->userData, out);
7234 }
7235 } else {
7236 /*
7237 * Just encode the value in UTF-8
7238 */
7239 COPY_BUF(0 ,out, i, value);
7240 out[i] = 0;
7241 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7242 (!ctxt->disableSAX))
7243 ctxt->sax->characters(ctxt->userData, out, i);
7244 }
7245 return;
7246 }
7247
7248 /*
7249 * We are seeing an entity reference
7250 */
7251 ent = xmlParseEntityRef(ctxt);
7252 if (ent == NULL) return;
7253 if (!ctxt->wellFormed)
7254 return;
7255 was_checked = ent->checked;
7256
7257 /* special case of predefined entities */
7258 if ((ent->name == NULL) ||
7259 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7260 val = ent->content;
7261 if (val == NULL) return;
7262 /*
7263 * inline the entity.
7264 */
7265 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7266 (!ctxt->disableSAX))
7267 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7268 return;
7269 }
7270
7271 /*
7272 * The first reference to the entity trigger a parsing phase
7273 * where the ent->children is filled with the result from
7274 * the parsing.
7275 * Note: external parsed entities will not be loaded, it is not
7276 * required for a non-validating parser, unless the parsing option
7277 * of validating, or substituting entities were given. Doing so is
7278 * far more secure as the parser will only process data coming from
7279 * the document entity by default.
7280 */
7281 if (((ent->checked == 0) ||
7282 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7283 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7284 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7285 unsigned long oldnbent = ctxt->nbentities;
7286
7287 /*
7288 * This is a bit hackish but this seems the best
7289 * way to make sure both SAX and DOM entity support
7290 * behaves okay.
7291 */
7292 void *user_data;
7293 if (ctxt->userData == ctxt)
7294 user_data = NULL;
7295 else
7296 user_data = ctxt->userData;
7297
7298 /*
7299 * Check that this entity is well formed
7300 * 4.3.2: An internal general parsed entity is well-formed
7301 * if its replacement text matches the production labeled
7302 * content.
7303 */
7304 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7305 ctxt->depth++;
7306 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7307 user_data, &list);
7308 ctxt->depth--;
7309
7310 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7311 ctxt->depth++;
7312 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7313 user_data, ctxt->depth, ent->URI,
7314 ent->ExternalID, &list);
7315 ctxt->depth--;
7316 } else {
7317 ret = XML_ERR_ENTITY_PE_INTERNAL;
7318 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7319 "invalid entity type found\n", NULL);
7320 }
7321
7322 /*
7323 * Store the number of entities needing parsing for this entity
7324 * content and do checkings
7325 */
7326 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7327 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7328 ent->checked |= 1;
7329 if (ret == XML_ERR_ENTITY_LOOP) {
7330 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7331 xmlFreeNodeList(list);
7332 return;
7333 }
7334 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7335 xmlFreeNodeList(list);
7336 return;
7337 }
7338
7339 if ((ret == XML_ERR_OK) && (list != NULL)) {
7340 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7341 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7342 (ent->children == NULL)) {
7343 ent->children = list;
7344 if (ctxt->replaceEntities) {
7345 /*
7346 * Prune it directly in the generated document
7347 * except for single text nodes.
7348 */
7349 if (((list->type == XML_TEXT_NODE) &&
7350 (list->next == NULL)) ||
7351 (ctxt->parseMode == XML_PARSE_READER)) {
7352 list->parent = (xmlNodePtr) ent;
7353 list = NULL;
7354 ent->owner = 1;
7355 } else {
7356 ent->owner = 0;
7357 while (list != NULL) {
7358 list->parent = (xmlNodePtr) ctxt->node;
7359 list->doc = ctxt->myDoc;
7360 if (list->next == NULL)
7361 ent->last = list;
7362 list = list->next;
7363 }
7364 list = ent->children;
7365 #ifdef LIBXML_LEGACY_ENABLED
7366 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7367 xmlAddEntityReference(ent, list, NULL);
7368 #endif /* LIBXML_LEGACY_ENABLED */
7369 }
7370 } else {
7371 ent->owner = 1;
7372 while (list != NULL) {
7373 list->parent = (xmlNodePtr) ent;
7374 xmlSetTreeDoc(list, ent->doc);
7375 if (list->next == NULL)
7376 ent->last = list;
7377 list = list->next;
7378 }
7379 }
7380 } else {
7381 xmlFreeNodeList(list);
7382 list = NULL;
7383 }
7384 } else if ((ret != XML_ERR_OK) &&
7385 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7386 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7387 "Entity '%s' failed to parse\n", ent->name);
7388 xmlParserEntityCheck(ctxt, 0, ent, 0);
7389 } else if (list != NULL) {
7390 xmlFreeNodeList(list);
7391 list = NULL;
7392 }
7393 if (ent->checked == 0)
7394 ent->checked = 2;
7395 } else if (ent->checked != 1) {
7396 ctxt->nbentities += ent->checked / 2;
7397 }
7398
7399 /*
7400 * Now that the entity content has been gathered
7401 * provide it to the application, this can take different forms based
7402 * on the parsing modes.
7403 */
7404 if (ent->children == NULL) {
7405 /*
7406 * Probably running in SAX mode and the callbacks don't
7407 * build the entity content. So unless we already went
7408 * though parsing for first checking go though the entity
7409 * content to generate callbacks associated to the entity
7410 */
7411 if (was_checked != 0) {
7412 void *user_data;
7413 /*
7414 * This is a bit hackish but this seems the best
7415 * way to make sure both SAX and DOM entity support
7416 * behaves okay.
7417 */
7418 if (ctxt->userData == ctxt)
7419 user_data = NULL;
7420 else
7421 user_data = ctxt->userData;
7422
7423 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7424 ctxt->depth++;
7425 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7426 ent->content, user_data, NULL);
7427 ctxt->depth--;
7428 } else if (ent->etype ==
7429 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7430 ctxt->depth++;
7431 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7432 ctxt->sax, user_data, ctxt->depth,
7433 ent->URI, ent->ExternalID, NULL);
7434 ctxt->depth--;
7435 } else {
7436 ret = XML_ERR_ENTITY_PE_INTERNAL;
7437 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7438 "invalid entity type found\n", NULL);
7439 }
7440 if (ret == XML_ERR_ENTITY_LOOP) {
7441 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7442 return;
7443 }
7444 }
7445 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7446 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7447 /*
7448 * Entity reference callback comes second, it's somewhat
7449 * superfluous but a compatibility to historical behaviour
7450 */
7451 ctxt->sax->reference(ctxt->userData, ent->name);
7452 }
7453 return;
7454 }
7455
7456 /*
7457 * If we didn't get any children for the entity being built
7458 */
7459 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7460 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7461 /*
7462 * Create a node.
7463 */
7464 ctxt->sax->reference(ctxt->userData, ent->name);
7465 return;
7466 }
7467
7468 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7469 /*
7470 * There is a problem on the handling of _private for entities
7471 * (bug 155816): Should we copy the content of the field from
7472 * the entity (possibly overwriting some value set by the user
7473 * when a copy is created), should we leave it alone, or should
7474 * we try to take care of different situations? The problem
7475 * is exacerbated by the usage of this field by the xmlReader.
7476 * To fix this bug, we look at _private on the created node
7477 * and, if it's NULL, we copy in whatever was in the entity.
7478 * If it's not NULL we leave it alone. This is somewhat of a
7479 * hack - maybe we should have further tests to determine
7480 * what to do.
7481 */
7482 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7483 /*
7484 * Seems we are generating the DOM content, do
7485 * a simple tree copy for all references except the first
7486 * In the first occurrence list contains the replacement.
7487 */
7488 if (((list == NULL) && (ent->owner == 0)) ||
7489 (ctxt->parseMode == XML_PARSE_READER)) {
7490 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7491
7492 /*
7493 * We are copying here, make sure there is no abuse
7494 */
7495 ctxt->sizeentcopy += ent->length + 5;
7496 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7497 return;
7498
7499 /*
7500 * when operating on a reader, the entities definitions
7501 * are always owning the entities subtree.
7502 if (ctxt->parseMode == XML_PARSE_READER)
7503 ent->owner = 1;
7504 */
7505
7506 cur = ent->children;
7507 while (cur != NULL) {
7508 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7509 if (nw != NULL) {
7510 if (nw->_private == NULL)
7511 nw->_private = cur->_private;
7512 if (firstChild == NULL){
7513 firstChild = nw;
7514 }
7515 nw = xmlAddChild(ctxt->node, nw);
7516 }
7517 if (cur == ent->last) {
7518 /*
7519 * needed to detect some strange empty
7520 * node cases in the reader tests
7521 */
7522 if ((ctxt->parseMode == XML_PARSE_READER) &&
7523 (nw != NULL) &&
7524 (nw->type == XML_ELEMENT_NODE) &&
7525 (nw->children == NULL))
7526 nw->extra = 1;
7527
7528 break;
7529 }
7530 cur = cur->next;
7531 }
7532 #ifdef LIBXML_LEGACY_ENABLED
7533 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7534 xmlAddEntityReference(ent, firstChild, nw);
7535 #endif /* LIBXML_LEGACY_ENABLED */
7536 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7537 xmlNodePtr nw = NULL, cur, next, last,
7538 firstChild = NULL;
7539
7540 /*
7541 * We are copying here, make sure there is no abuse
7542 */
7543 ctxt->sizeentcopy += ent->length + 5;
7544 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7545 return;
7546
7547 /*
7548 * Copy the entity child list and make it the new
7549 * entity child list. The goal is to make sure any
7550 * ID or REF referenced will be the one from the
7551 * document content and not the entity copy.
7552 */
7553 cur = ent->children;
7554 ent->children = NULL;
7555 last = ent->last;
7556 ent->last = NULL;
7557 while (cur != NULL) {
7558 next = cur->next;
7559 cur->next = NULL;
7560 cur->parent = NULL;
7561 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7562 if (nw != NULL) {
7563 if (nw->_private == NULL)
7564 nw->_private = cur->_private;
7565 if (firstChild == NULL){
7566 firstChild = cur;
7567 }
7568 xmlAddChild((xmlNodePtr) ent, nw);
7569 xmlAddChild(ctxt->node, cur);
7570 }
7571 if (cur == last)
7572 break;
7573 cur = next;
7574 }
7575 if (ent->owner == 0)
7576 ent->owner = 1;
7577 #ifdef LIBXML_LEGACY_ENABLED
7578 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7579 xmlAddEntityReference(ent, firstChild, nw);
7580 #endif /* LIBXML_LEGACY_ENABLED */
7581 } else {
7582 const xmlChar *nbktext;
7583
7584 /*
7585 * the name change is to avoid coalescing of the
7586 * node with a possible previous text one which
7587 * would make ent->children a dangling pointer
7588 */
7589 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7590 -1);
7591 if (ent->children->type == XML_TEXT_NODE)
7592 ent->children->name = nbktext;
7593 if ((ent->last != ent->children) &&
7594 (ent->last->type == XML_TEXT_NODE))
7595 ent->last->name = nbktext;
7596 xmlAddChildList(ctxt->node, ent->children);
7597 }
7598
7599 /*
7600 * This is to avoid a nasty side effect, see
7601 * characters() in SAX.c
7602 */
7603 ctxt->nodemem = 0;
7604 ctxt->nodelen = 0;
7605 return;
7606 }
7607 }
7608 }
7609
7610 /**
7611 * xmlParseEntityRef:
7612 * @ctxt: an XML parser context
7613 *
7614 * parse ENTITY references declarations
7615 *
7616 * [68] EntityRef ::= '&' Name ';'
7617 *
7618 * [ WFC: Entity Declared ]
7619 * In a document without any DTD, a document with only an internal DTD
7620 * subset which contains no parameter entity references, or a document
7621 * with "standalone='yes'", the Name given in the entity reference
7622 * must match that in an entity declaration, except that well-formed
7623 * documents need not declare any of the following entities: amp, lt,
7624 * gt, apos, quot. The declaration of a parameter entity must precede
7625 * any reference to it. Similarly, the declaration of a general entity
7626 * must precede any reference to it which appears in a default value in an
7627 * attribute-list declaration. Note that if entities are declared in the
7628 * external subset or in external parameter entities, a non-validating
7629 * processor is not obligated to read and process their declarations;
7630 * for such documents, the rule that an entity must be declared is a
7631 * well-formedness constraint only if standalone='yes'.
7632 *
7633 * [ WFC: Parsed Entity ]
7634 * An entity reference must not contain the name of an unparsed entity
7635 *
7636 * Returns the xmlEntityPtr if found, or NULL otherwise.
7637 */
7638 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7639 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7640 const xmlChar *name;
7641 xmlEntityPtr ent = NULL;
7642
7643 GROW;
7644 if (ctxt->instate == XML_PARSER_EOF)
7645 return(NULL);
7646
7647 if (RAW != '&')
7648 return(NULL);
7649 NEXT;
7650 name = xmlParseName(ctxt);
7651 if (name == NULL) {
7652 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7653 "xmlParseEntityRef: no name\n");
7654 return(NULL);
7655 }
7656 if (RAW != ';') {
7657 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7658 return(NULL);
7659 }
7660 NEXT;
7661
7662 /*
7663 * Predefined entities override any extra definition
7664 */
7665 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7666 ent = xmlGetPredefinedEntity(name);
7667 if (ent != NULL)
7668 return(ent);
7669 }
7670
7671 /*
7672 * Increase the number of entity references parsed
7673 */
7674 ctxt->nbentities++;
7675
7676 /*
7677 * Ask first SAX for entity resolution, otherwise try the
7678 * entities which may have stored in the parser context.
7679 */
7680 if (ctxt->sax != NULL) {
7681 if (ctxt->sax->getEntity != NULL)
7682 ent = ctxt->sax->getEntity(ctxt->userData, name);
7683 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7684 (ctxt->options & XML_PARSE_OLDSAX))
7685 ent = xmlGetPredefinedEntity(name);
7686 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7687 (ctxt->userData==ctxt)) {
7688 ent = xmlSAX2GetEntity(ctxt, name);
7689 }
7690 }
7691 if (ctxt->instate == XML_PARSER_EOF)
7692 return(NULL);
7693 /*
7694 * [ WFC: Entity Declared ]
7695 * In a document without any DTD, a document with only an
7696 * internal DTD subset which contains no parameter entity
7697 * references, or a document with "standalone='yes'", the
7698 * Name given in the entity reference must match that in an
7699 * entity declaration, except that well-formed documents
7700 * need not declare any of the following entities: amp, lt,
7701 * gt, apos, quot.
7702 * The declaration of a parameter entity must precede any
7703 * reference to it.
7704 * Similarly, the declaration of a general entity must
7705 * precede any reference to it which appears in a default
7706 * value in an attribute-list declaration. Note that if
7707 * entities are declared in the external subset or in
7708 * external parameter entities, a non-validating processor
7709 * is not obligated to read and process their declarations;
7710 * for such documents, the rule that an entity must be
7711 * declared is a well-formedness constraint only if
7712 * standalone='yes'.
7713 */
7714 if (ent == NULL) {
7715 if ((ctxt->standalone == 1) ||
7716 ((ctxt->hasExternalSubset == 0) &&
7717 (ctxt->hasPErefs == 0))) {
7718 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7719 "Entity '%s' not defined\n", name);
7720 } else {
7721 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7722 "Entity '%s' not defined\n", name);
7723 if ((ctxt->inSubset == 0) &&
7724 (ctxt->sax != NULL) &&
7725 (ctxt->sax->reference != NULL)) {
7726 ctxt->sax->reference(ctxt->userData, name);
7727 }
7728 }
7729 xmlParserEntityCheck(ctxt, 0, ent, 0);
7730 ctxt->valid = 0;
7731 }
7732
7733 /*
7734 * [ WFC: Parsed Entity ]
7735 * An entity reference must not contain the name of an
7736 * unparsed entity
7737 */
7738 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7739 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7740 "Entity reference to unparsed entity %s\n", name);
7741 }
7742
7743 /*
7744 * [ WFC: No External Entity References ]
7745 * Attribute values cannot contain direct or indirect
7746 * entity references to external entities.
7747 */
7748 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7749 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7750 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7751 "Attribute references external entity '%s'\n", name);
7752 }
7753 /*
7754 * [ WFC: No < in Attribute Values ]
7755 * The replacement text of any entity referred to directly or
7756 * indirectly in an attribute value (other than "<") must
7757 * not contain a <.
7758 */
7759 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7760 (ent != NULL) &&
7761 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7762 if (((ent->checked & 1) || (ent->checked == 0)) &&
7763 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7764 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7765 "'<' in entity '%s' is not allowed in attributes values\n", name);
7766 }
7767 }
7768
7769 /*
7770 * Internal check, no parameter entities here ...
7771 */
7772 else {
7773 switch (ent->etype) {
7774 case XML_INTERNAL_PARAMETER_ENTITY:
7775 case XML_EXTERNAL_PARAMETER_ENTITY:
7776 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7777 "Attempt to reference the parameter entity '%s'\n",
7778 name);
7779 break;
7780 default:
7781 break;
7782 }
7783 }
7784
7785 /*
7786 * [ WFC: No Recursion ]
7787 * A parsed entity must not contain a recursive reference
7788 * to itself, either directly or indirectly.
7789 * Done somewhere else
7790 */
7791 return(ent);
7792 }
7793
7794 /**
7795 * xmlParseStringEntityRef:
7796 * @ctxt: an XML parser context
7797 * @str: a pointer to an index in the string
7798 *
7799 * parse ENTITY references declarations, but this version parses it from
7800 * a string value.
7801 *
7802 * [68] EntityRef ::= '&' Name ';'
7803 *
7804 * [ WFC: Entity Declared ]
7805 * In a document without any DTD, a document with only an internal DTD
7806 * subset which contains no parameter entity references, or a document
7807 * with "standalone='yes'", the Name given in the entity reference
7808 * must match that in an entity declaration, except that well-formed
7809 * documents need not declare any of the following entities: amp, lt,
7810 * gt, apos, quot. The declaration of a parameter entity must precede
7811 * any reference to it. Similarly, the declaration of a general entity
7812 * must precede any reference to it which appears in a default value in an
7813 * attribute-list declaration. Note that if entities are declared in the
7814 * external subset or in external parameter entities, a non-validating
7815 * processor is not obligated to read and process their declarations;
7816 * for such documents, the rule that an entity must be declared is a
7817 * well-formedness constraint only if standalone='yes'.
7818 *
7819 * [ WFC: Parsed Entity ]
7820 * An entity reference must not contain the name of an unparsed entity
7821 *
7822 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7823 * is updated to the current location in the string.
7824 */
7825 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7826 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7827 xmlChar *name;
7828 const xmlChar *ptr;
7829 xmlChar cur;
7830 xmlEntityPtr ent = NULL;
7831
7832 if ((str == NULL) || (*str == NULL))
7833 return(NULL);
7834 ptr = *str;
7835 cur = *ptr;
7836 if (cur != '&')
7837 return(NULL);
7838
7839 ptr++;
7840 name = xmlParseStringName(ctxt, &ptr);
7841 if (name == NULL) {
7842 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7843 "xmlParseStringEntityRef: no name\n");
7844 *str = ptr;
7845 return(NULL);
7846 }
7847 if (*ptr != ';') {
7848 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7849 xmlFree(name);
7850 *str = ptr;
7851 return(NULL);
7852 }
7853 ptr++;
7854
7855
7856 /*
7857 * Predefined entities override any extra definition
7858 */
7859 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7860 ent = xmlGetPredefinedEntity(name);
7861 if (ent != NULL) {
7862 xmlFree(name);
7863 *str = ptr;
7864 return(ent);
7865 }
7866 }
7867
7868 /*
7869 * Increate the number of entity references parsed
7870 */
7871 ctxt->nbentities++;
7872
7873 /*
7874 * Ask first SAX for entity resolution, otherwise try the
7875 * entities which may have stored in the parser context.
7876 */
7877 if (ctxt->sax != NULL) {
7878 if (ctxt->sax->getEntity != NULL)
7879 ent = ctxt->sax->getEntity(ctxt->userData, name);
7880 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7881 ent = xmlGetPredefinedEntity(name);
7882 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7883 ent = xmlSAX2GetEntity(ctxt, name);
7884 }
7885 }
7886 if (ctxt->instate == XML_PARSER_EOF) {
7887 xmlFree(name);
7888 return(NULL);
7889 }
7890
7891 /*
7892 * [ WFC: Entity Declared ]
7893 * In a document without any DTD, a document with only an
7894 * internal DTD subset which contains no parameter entity
7895 * references, or a document with "standalone='yes'", the
7896 * Name given in the entity reference must match that in an
7897 * entity declaration, except that well-formed documents
7898 * need not declare any of the following entities: amp, lt,
7899 * gt, apos, quot.
7900 * The declaration of a parameter entity must precede any
7901 * reference to it.
7902 * Similarly, the declaration of a general entity must
7903 * precede any reference to it which appears in a default
7904 * value in an attribute-list declaration. Note that if
7905 * entities are declared in the external subset or in
7906 * external parameter entities, a non-validating processor
7907 * is not obligated to read and process their declarations;
7908 * for such documents, the rule that an entity must be
7909 * declared is a well-formedness constraint only if
7910 * standalone='yes'.
7911 */
7912 if (ent == NULL) {
7913 if ((ctxt->standalone == 1) ||
7914 ((ctxt->hasExternalSubset == 0) &&
7915 (ctxt->hasPErefs == 0))) {
7916 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7917 "Entity '%s' not defined\n", name);
7918 } else {
7919 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7920 "Entity '%s' not defined\n",
7921 name);
7922 }
7923 xmlParserEntityCheck(ctxt, 0, ent, 0);
7924 /* TODO ? check regressions ctxt->valid = 0; */
7925 }
7926
7927 /*
7928 * [ WFC: Parsed Entity ]
7929 * An entity reference must not contain the name of an
7930 * unparsed entity
7931 */
7932 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7933 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7934 "Entity reference to unparsed entity %s\n", name);
7935 }
7936
7937 /*
7938 * [ WFC: No External Entity References ]
7939 * Attribute values cannot contain direct or indirect
7940 * entity references to external entities.
7941 */
7942 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7943 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7944 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7945 "Attribute references external entity '%s'\n", name);
7946 }
7947 /*
7948 * [ WFC: No < in Attribute Values ]
7949 * The replacement text of any entity referred to directly or
7950 * indirectly in an attribute value (other than "<") must
7951 * not contain a <.
7952 */
7953 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7954 (ent != NULL) && (ent->content != NULL) &&
7955 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7956 (xmlStrchr(ent->content, '<'))) {
7957 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7958 "'<' in entity '%s' is not allowed in attributes values\n",
7959 name);
7960 }
7961
7962 /*
7963 * Internal check, no parameter entities here ...
7964 */
7965 else {
7966 switch (ent->etype) {
7967 case XML_INTERNAL_PARAMETER_ENTITY:
7968 case XML_EXTERNAL_PARAMETER_ENTITY:
7969 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7970 "Attempt to reference the parameter entity '%s'\n",
7971 name);
7972 break;
7973 default:
7974 break;
7975 }
7976 }
7977
7978 /*
7979 * [ WFC: No Recursion ]
7980 * A parsed entity must not contain a recursive reference
7981 * to itself, either directly or indirectly.
7982 * Done somewhere else
7983 */
7984
7985 xmlFree(name);
7986 *str = ptr;
7987 return(ent);
7988 }
7989
7990 /**
7991 * xmlParsePEReference:
7992 * @ctxt: an XML parser context
7993 *
7994 * parse PEReference declarations
7995 * The entity content is handled directly by pushing it's content as
7996 * a new input stream.
7997 *
7998 * [69] PEReference ::= '%' Name ';'
7999 *
8000 * [ WFC: No Recursion ]
8001 * A parsed entity must not contain a recursive
8002 * reference to itself, either directly or indirectly.
8003 *
8004 * [ WFC: Entity Declared ]
8005 * In a document without any DTD, a document with only an internal DTD
8006 * subset which contains no parameter entity references, or a document
8007 * with "standalone='yes'", ... ... The declaration of a parameter
8008 * entity must precede any reference to it...
8009 *
8010 * [ VC: Entity Declared ]
8011 * In a document with an external subset or external parameter entities
8012 * with "standalone='no'", ... ... The declaration of a parameter entity
8013 * must precede any reference to it...
8014 *
8015 * [ WFC: In DTD ]
8016 * Parameter-entity references may only appear in the DTD.
8017 * NOTE: misleading but this is handled.
8018 */
8019 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)8020 xmlParsePEReference(xmlParserCtxtPtr ctxt)
8021 {
8022 const xmlChar *name;
8023 xmlEntityPtr entity = NULL;
8024 xmlParserInputPtr input;
8025
8026 if (RAW != '%')
8027 return;
8028 NEXT;
8029 name = xmlParseName(ctxt);
8030 if (name == NULL) {
8031 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8032 "xmlParsePEReference: no name\n");
8033 return;
8034 }
8035 if (RAW != ';') {
8036 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8037 return;
8038 }
8039
8040 NEXT;
8041
8042 /*
8043 * Increate the number of entity references parsed
8044 */
8045 ctxt->nbentities++;
8046
8047 /*
8048 * Request the entity from SAX
8049 */
8050 if ((ctxt->sax != NULL) &&
8051 (ctxt->sax->getParameterEntity != NULL))
8052 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8053 if (ctxt->instate == XML_PARSER_EOF)
8054 return;
8055 if (entity == NULL) {
8056 /*
8057 * [ WFC: Entity Declared ]
8058 * In a document without any DTD, a document with only an
8059 * internal DTD subset which contains no parameter entity
8060 * references, or a document with "standalone='yes'", ...
8061 * ... The declaration of a parameter entity must precede
8062 * any reference to it...
8063 */
8064 if ((ctxt->standalone == 1) ||
8065 ((ctxt->hasExternalSubset == 0) &&
8066 (ctxt->hasPErefs == 0))) {
8067 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8068 "PEReference: %%%s; not found\n",
8069 name);
8070 } else {
8071 /*
8072 * [ VC: Entity Declared ]
8073 * In a document with an external subset or external
8074 * parameter entities with "standalone='no'", ...
8075 * ... The declaration of a parameter entity must
8076 * precede any reference to it...
8077 */
8078 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8079 "PEReference: %%%s; not found\n",
8080 name, NULL);
8081 ctxt->valid = 0;
8082 }
8083 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8084 } else {
8085 /*
8086 * Internal checking in case the entity quest barfed
8087 */
8088 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8089 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8090 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8091 "Internal: %%%s; is not a parameter entity\n",
8092 name, NULL);
8093 } else if (ctxt->input->free != deallocblankswrapper) {
8094 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
8095 if (xmlPushInput(ctxt, input) < 0)
8096 return;
8097 } else {
8098 /*
8099 * TODO !!!
8100 * handle the extra spaces added before and after
8101 * c.f. http://www.w3.org/TR/REC-xml#as-PE
8102 */
8103 input = xmlNewEntityInputStream(ctxt, entity);
8104 if (xmlPushInput(ctxt, input) < 0)
8105 return;
8106 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8107 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8108 (IS_BLANK_CH(NXT(5)))) {
8109 xmlParseTextDecl(ctxt);
8110 if (ctxt->errNo ==
8111 XML_ERR_UNSUPPORTED_ENCODING) {
8112 /*
8113 * The XML REC instructs us to stop parsing
8114 * right here
8115 */
8116 xmlHaltParser(ctxt);
8117 return;
8118 }
8119 }
8120 }
8121 }
8122 ctxt->hasPErefs = 1;
8123 }
8124
8125 /**
8126 * xmlLoadEntityContent:
8127 * @ctxt: an XML parser context
8128 * @entity: an unloaded system entity
8129 *
8130 * Load the original content of the given system entity from the
8131 * ExternalID/SystemID given. This is to be used for Included in Literal
8132 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8133 *
8134 * Returns 0 in case of success and -1 in case of failure
8135 */
8136 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)8137 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8138 xmlParserInputPtr input;
8139 xmlBufferPtr buf;
8140 int l, c;
8141 int count = 0;
8142
8143 if ((ctxt == NULL) || (entity == NULL) ||
8144 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8145 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8146 (entity->content != NULL)) {
8147 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8148 "xmlLoadEntityContent parameter error");
8149 return(-1);
8150 }
8151
8152 if (xmlParserDebugEntities)
8153 xmlGenericError(xmlGenericErrorContext,
8154 "Reading %s entity content input\n", entity->name);
8155
8156 buf = xmlBufferCreate();
8157 if (buf == NULL) {
8158 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8159 "xmlLoadEntityContent parameter error");
8160 return(-1);
8161 }
8162
8163 input = xmlNewEntityInputStream(ctxt, entity);
8164 if (input == NULL) {
8165 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8166 "xmlLoadEntityContent input error");
8167 xmlBufferFree(buf);
8168 return(-1);
8169 }
8170
8171 /*
8172 * Push the entity as the current input, read char by char
8173 * saving to the buffer until the end of the entity or an error
8174 */
8175 if (xmlPushInput(ctxt, input) < 0) {
8176 xmlBufferFree(buf);
8177 return(-1);
8178 }
8179
8180 GROW;
8181 c = CUR_CHAR(l);
8182 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8183 (IS_CHAR(c))) {
8184 xmlBufferAdd(buf, ctxt->input->cur, l);
8185 if (count++ > XML_PARSER_CHUNK_SIZE) {
8186 count = 0;
8187 GROW;
8188 if (ctxt->instate == XML_PARSER_EOF) {
8189 xmlBufferFree(buf);
8190 return(-1);
8191 }
8192 }
8193 NEXTL(l);
8194 c = CUR_CHAR(l);
8195 if (c == 0) {
8196 count = 0;
8197 GROW;
8198 if (ctxt->instate == XML_PARSER_EOF) {
8199 xmlBufferFree(buf);
8200 return(-1);
8201 }
8202 c = CUR_CHAR(l);
8203 }
8204 }
8205
8206 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8207 xmlPopInput(ctxt);
8208 } else if (!IS_CHAR(c)) {
8209 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8210 "xmlLoadEntityContent: invalid char value %d\n",
8211 c);
8212 xmlBufferFree(buf);
8213 return(-1);
8214 }
8215 entity->content = buf->content;
8216 buf->content = NULL;
8217 xmlBufferFree(buf);
8218
8219 return(0);
8220 }
8221
8222 /**
8223 * xmlParseStringPEReference:
8224 * @ctxt: an XML parser context
8225 * @str: a pointer to an index in the string
8226 *
8227 * parse PEReference declarations
8228 *
8229 * [69] PEReference ::= '%' Name ';'
8230 *
8231 * [ WFC: No Recursion ]
8232 * A parsed entity must not contain a recursive
8233 * reference to itself, either directly or indirectly.
8234 *
8235 * [ WFC: Entity Declared ]
8236 * In a document without any DTD, a document with only an internal DTD
8237 * subset which contains no parameter entity references, or a document
8238 * with "standalone='yes'", ... ... The declaration of a parameter
8239 * entity must precede any reference to it...
8240 *
8241 * [ VC: Entity Declared ]
8242 * In a document with an external subset or external parameter entities
8243 * with "standalone='no'", ... ... The declaration of a parameter entity
8244 * must precede any reference to it...
8245 *
8246 * [ WFC: In DTD ]
8247 * Parameter-entity references may only appear in the DTD.
8248 * NOTE: misleading but this is handled.
8249 *
8250 * Returns the string of the entity content.
8251 * str is updated to the current value of the index
8252 */
8253 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)8254 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8255 const xmlChar *ptr;
8256 xmlChar cur;
8257 xmlChar *name;
8258 xmlEntityPtr entity = NULL;
8259
8260 if ((str == NULL) || (*str == NULL)) return(NULL);
8261 ptr = *str;
8262 cur = *ptr;
8263 if (cur != '%')
8264 return(NULL);
8265 ptr++;
8266 name = xmlParseStringName(ctxt, &ptr);
8267 if (name == NULL) {
8268 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8269 "xmlParseStringPEReference: no name\n");
8270 *str = ptr;
8271 return(NULL);
8272 }
8273 cur = *ptr;
8274 if (cur != ';') {
8275 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8276 xmlFree(name);
8277 *str = ptr;
8278 return(NULL);
8279 }
8280 ptr++;
8281
8282 /*
8283 * Increate the number of entity references parsed
8284 */
8285 ctxt->nbentities++;
8286
8287 /*
8288 * Request the entity from SAX
8289 */
8290 if ((ctxt->sax != NULL) &&
8291 (ctxt->sax->getParameterEntity != NULL))
8292 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8293 if (ctxt->instate == XML_PARSER_EOF) {
8294 xmlFree(name);
8295 return(NULL);
8296 }
8297 if (entity == NULL) {
8298 /*
8299 * [ WFC: Entity Declared ]
8300 * In a document without any DTD, a document with only an
8301 * internal DTD subset which contains no parameter entity
8302 * references, or a document with "standalone='yes'", ...
8303 * ... The declaration of a parameter entity must precede
8304 * any reference to it...
8305 */
8306 if ((ctxt->standalone == 1) ||
8307 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8308 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8309 "PEReference: %%%s; not found\n", name);
8310 } else {
8311 /*
8312 * [ VC: Entity Declared ]
8313 * In a document with an external subset or external
8314 * parameter entities with "standalone='no'", ...
8315 * ... The declaration of a parameter entity must
8316 * precede any reference to it...
8317 */
8318 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8319 "PEReference: %%%s; not found\n",
8320 name, NULL);
8321 ctxt->valid = 0;
8322 }
8323 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8324 } else {
8325 /*
8326 * Internal checking in case the entity quest barfed
8327 */
8328 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8329 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8330 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8331 "%%%s; is not a parameter entity\n",
8332 name, NULL);
8333 }
8334 }
8335 ctxt->hasPErefs = 1;
8336 xmlFree(name);
8337 *str = ptr;
8338 return(entity);
8339 }
8340
8341 /**
8342 * xmlParseDocTypeDecl:
8343 * @ctxt: an XML parser context
8344 *
8345 * parse a DOCTYPE declaration
8346 *
8347 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8348 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8349 *
8350 * [ VC: Root Element Type ]
8351 * The Name in the document type declaration must match the element
8352 * type of the root element.
8353 */
8354
8355 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)8356 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8357 const xmlChar *name = NULL;
8358 xmlChar *ExternalID = NULL;
8359 xmlChar *URI = NULL;
8360
8361 /*
8362 * We know that '<!DOCTYPE' has been detected.
8363 */
8364 SKIP(9);
8365
8366 SKIP_BLANKS;
8367
8368 /*
8369 * Parse the DOCTYPE name.
8370 */
8371 name = xmlParseName(ctxt);
8372 if (name == NULL) {
8373 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8374 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8375 }
8376 ctxt->intSubName = name;
8377
8378 SKIP_BLANKS;
8379
8380 /*
8381 * Check for SystemID and ExternalID
8382 */
8383 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8384
8385 if ((URI != NULL) || (ExternalID != NULL)) {
8386 ctxt->hasExternalSubset = 1;
8387 }
8388 ctxt->extSubURI = URI;
8389 ctxt->extSubSystem = ExternalID;
8390
8391 SKIP_BLANKS;
8392
8393 /*
8394 * Create and update the internal subset.
8395 */
8396 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8397 (!ctxt->disableSAX))
8398 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8399 if (ctxt->instate == XML_PARSER_EOF)
8400 return;
8401
8402 /*
8403 * Is there any internal subset declarations ?
8404 * they are handled separately in xmlParseInternalSubset()
8405 */
8406 if (RAW == '[')
8407 return;
8408
8409 /*
8410 * We should be at the end of the DOCTYPE declaration.
8411 */
8412 if (RAW != '>') {
8413 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8414 }
8415 NEXT;
8416 }
8417
8418 /**
8419 * xmlParseInternalSubset:
8420 * @ctxt: an XML parser context
8421 *
8422 * parse the internal subset declaration
8423 *
8424 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8425 */
8426
8427 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8428 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8429 /*
8430 * Is there any DTD definition ?
8431 */
8432 if (RAW == '[') {
8433 ctxt->instate = XML_PARSER_DTD;
8434 NEXT;
8435 /*
8436 * Parse the succession of Markup declarations and
8437 * PEReferences.
8438 * Subsequence (markupdecl | PEReference | S)*
8439 */
8440 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
8441 const xmlChar *check = CUR_PTR;
8442 unsigned int cons = ctxt->input->consumed;
8443
8444 SKIP_BLANKS;
8445 xmlParseMarkupDecl(ctxt);
8446 xmlParsePEReference(ctxt);
8447
8448 /*
8449 * Pop-up of finished entities.
8450 */
8451 while ((RAW == 0) && (ctxt->inputNr > 1))
8452 xmlPopInput(ctxt);
8453
8454 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8455 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8456 "xmlParseInternalSubset: error detected in Markup declaration\n");
8457 break;
8458 }
8459 }
8460 if (RAW == ']') {
8461 NEXT;
8462 SKIP_BLANKS;
8463 }
8464 }
8465
8466 /*
8467 * We should be at the end of the DOCTYPE declaration.
8468 */
8469 if (RAW != '>') {
8470 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8471 }
8472 NEXT;
8473 }
8474
8475 #ifdef LIBXML_SAX1_ENABLED
8476 /**
8477 * xmlParseAttribute:
8478 * @ctxt: an XML parser context
8479 * @value: a xmlChar ** used to store the value of the attribute
8480 *
8481 * parse an attribute
8482 *
8483 * [41] Attribute ::= Name Eq AttValue
8484 *
8485 * [ WFC: No External Entity References ]
8486 * Attribute values cannot contain direct or indirect entity references
8487 * to external entities.
8488 *
8489 * [ WFC: No < in Attribute Values ]
8490 * The replacement text of any entity referred to directly or indirectly in
8491 * an attribute value (other than "<") must not contain a <.
8492 *
8493 * [ VC: Attribute Value Type ]
8494 * The attribute must have been declared; the value must be of the type
8495 * declared for it.
8496 *
8497 * [25] Eq ::= S? '=' S?
8498 *
8499 * With namespace:
8500 *
8501 * [NS 11] Attribute ::= QName Eq AttValue
8502 *
8503 * Also the case QName == xmlns:??? is handled independently as a namespace
8504 * definition.
8505 *
8506 * Returns the attribute name, and the value in *value.
8507 */
8508
8509 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8510 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8511 const xmlChar *name;
8512 xmlChar *val;
8513
8514 *value = NULL;
8515 GROW;
8516 name = xmlParseName(ctxt);
8517 if (name == NULL) {
8518 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8519 "error parsing attribute name\n");
8520 return(NULL);
8521 }
8522
8523 /*
8524 * read the value
8525 */
8526 SKIP_BLANKS;
8527 if (RAW == '=') {
8528 NEXT;
8529 SKIP_BLANKS;
8530 val = xmlParseAttValue(ctxt);
8531 ctxt->instate = XML_PARSER_CONTENT;
8532 } else {
8533 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8534 "Specification mandate value for attribute %s\n", name);
8535 return(NULL);
8536 }
8537
8538 /*
8539 * Check that xml:lang conforms to the specification
8540 * No more registered as an error, just generate a warning now
8541 * since this was deprecated in XML second edition
8542 */
8543 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8544 if (!xmlCheckLanguageID(val)) {
8545 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8546 "Malformed value for xml:lang : %s\n",
8547 val, NULL);
8548 }
8549 }
8550
8551 /*
8552 * Check that xml:space conforms to the specification
8553 */
8554 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8555 if (xmlStrEqual(val, BAD_CAST "default"))
8556 *(ctxt->space) = 0;
8557 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8558 *(ctxt->space) = 1;
8559 else {
8560 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8561 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8562 val, NULL);
8563 }
8564 }
8565
8566 *value = val;
8567 return(name);
8568 }
8569
8570 /**
8571 * xmlParseStartTag:
8572 * @ctxt: an XML parser context
8573 *
8574 * parse a start of tag either for rule element or
8575 * EmptyElement. In both case we don't parse the tag closing chars.
8576 *
8577 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8578 *
8579 * [ WFC: Unique Att Spec ]
8580 * No attribute name may appear more than once in the same start-tag or
8581 * empty-element tag.
8582 *
8583 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8584 *
8585 * [ WFC: Unique Att Spec ]
8586 * No attribute name may appear more than once in the same start-tag or
8587 * empty-element tag.
8588 *
8589 * With namespace:
8590 *
8591 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8592 *
8593 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8594 *
8595 * Returns the element name parsed
8596 */
8597
8598 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8599 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8600 const xmlChar *name;
8601 const xmlChar *attname;
8602 xmlChar *attvalue;
8603 const xmlChar **atts = ctxt->atts;
8604 int nbatts = 0;
8605 int maxatts = ctxt->maxatts;
8606 int i;
8607
8608 if (RAW != '<') return(NULL);
8609 NEXT1;
8610
8611 name = xmlParseName(ctxt);
8612 if (name == NULL) {
8613 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8614 "xmlParseStartTag: invalid element name\n");
8615 return(NULL);
8616 }
8617
8618 /*
8619 * Now parse the attributes, it ends up with the ending
8620 *
8621 * (S Attribute)* S?
8622 */
8623 SKIP_BLANKS;
8624 GROW;
8625
8626 while (((RAW != '>') &&
8627 ((RAW != '/') || (NXT(1) != '>')) &&
8628 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8629 const xmlChar *q = CUR_PTR;
8630 unsigned int cons = ctxt->input->consumed;
8631
8632 attname = xmlParseAttribute(ctxt, &attvalue);
8633 if ((attname != NULL) && (attvalue != NULL)) {
8634 /*
8635 * [ WFC: Unique Att Spec ]
8636 * No attribute name may appear more than once in the same
8637 * start-tag or empty-element tag.
8638 */
8639 for (i = 0; i < nbatts;i += 2) {
8640 if (xmlStrEqual(atts[i], attname)) {
8641 xmlErrAttributeDup(ctxt, NULL, attname);
8642 xmlFree(attvalue);
8643 goto failed;
8644 }
8645 }
8646 /*
8647 * Add the pair to atts
8648 */
8649 if (atts == NULL) {
8650 maxatts = 22; /* allow for 10 attrs by default */
8651 atts = (const xmlChar **)
8652 xmlMalloc(maxatts * sizeof(xmlChar *));
8653 if (atts == NULL) {
8654 xmlErrMemory(ctxt, NULL);
8655 if (attvalue != NULL)
8656 xmlFree(attvalue);
8657 goto failed;
8658 }
8659 ctxt->atts = atts;
8660 ctxt->maxatts = maxatts;
8661 } else if (nbatts + 4 > maxatts) {
8662 const xmlChar **n;
8663
8664 maxatts *= 2;
8665 n = (const xmlChar **) xmlRealloc((void *) atts,
8666 maxatts * sizeof(const xmlChar *));
8667 if (n == NULL) {
8668 xmlErrMemory(ctxt, NULL);
8669 if (attvalue != NULL)
8670 xmlFree(attvalue);
8671 goto failed;
8672 }
8673 atts = n;
8674 ctxt->atts = atts;
8675 ctxt->maxatts = maxatts;
8676 }
8677 atts[nbatts++] = attname;
8678 atts[nbatts++] = attvalue;
8679 atts[nbatts] = NULL;
8680 atts[nbatts + 1] = NULL;
8681 } else {
8682 if (attvalue != NULL)
8683 xmlFree(attvalue);
8684 }
8685
8686 failed:
8687
8688 GROW
8689 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8690 break;
8691 if (!IS_BLANK_CH(RAW)) {
8692 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8693 "attributes construct error\n");
8694 }
8695 SKIP_BLANKS;
8696 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8697 (attname == NULL) && (attvalue == NULL)) {
8698 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8699 "xmlParseStartTag: problem parsing attributes\n");
8700 break;
8701 }
8702 SHRINK;
8703 GROW;
8704 }
8705
8706 /*
8707 * SAX: Start of Element !
8708 */
8709 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8710 (!ctxt->disableSAX)) {
8711 if (nbatts > 0)
8712 ctxt->sax->startElement(ctxt->userData, name, atts);
8713 else
8714 ctxt->sax->startElement(ctxt->userData, name, NULL);
8715 }
8716
8717 if (atts != NULL) {
8718 /* Free only the content strings */
8719 for (i = 1;i < nbatts;i+=2)
8720 if (atts[i] != NULL)
8721 xmlFree((xmlChar *) atts[i]);
8722 }
8723 return(name);
8724 }
8725
8726 /**
8727 * xmlParseEndTag1:
8728 * @ctxt: an XML parser context
8729 * @line: line of the start tag
8730 * @nsNr: number of namespaces on the start tag
8731 *
8732 * parse an end of tag
8733 *
8734 * [42] ETag ::= '</' Name S? '>'
8735 *
8736 * With namespace
8737 *
8738 * [NS 9] ETag ::= '</' QName S? '>'
8739 */
8740
8741 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8742 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8743 const xmlChar *name;
8744
8745 GROW;
8746 if ((RAW != '<') || (NXT(1) != '/')) {
8747 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8748 "xmlParseEndTag: '</' not found\n");
8749 return;
8750 }
8751 SKIP(2);
8752
8753 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8754
8755 /*
8756 * We should definitely be at the ending "S? '>'" part
8757 */
8758 GROW;
8759 SKIP_BLANKS;
8760 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8761 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8762 } else
8763 NEXT1;
8764
8765 /*
8766 * [ WFC: Element Type Match ]
8767 * The Name in an element's end-tag must match the element type in the
8768 * start-tag.
8769 *
8770 */
8771 if (name != (xmlChar*)1) {
8772 if (name == NULL) name = BAD_CAST "unparseable";
8773 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8774 "Opening and ending tag mismatch: %s line %d and %s\n",
8775 ctxt->name, line, name);
8776 }
8777
8778 /*
8779 * SAX: End of Tag
8780 */
8781 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8782 (!ctxt->disableSAX))
8783 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8784
8785 namePop(ctxt);
8786 spacePop(ctxt);
8787 return;
8788 }
8789
8790 /**
8791 * xmlParseEndTag:
8792 * @ctxt: an XML parser context
8793 *
8794 * parse an end of tag
8795 *
8796 * [42] ETag ::= '</' Name S? '>'
8797 *
8798 * With namespace
8799 *
8800 * [NS 9] ETag ::= '</' QName S? '>'
8801 */
8802
8803 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8804 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8805 xmlParseEndTag1(ctxt, 0);
8806 }
8807 #endif /* LIBXML_SAX1_ENABLED */
8808
8809 /************************************************************************
8810 * *
8811 * SAX 2 specific operations *
8812 * *
8813 ************************************************************************/
8814
8815 /*
8816 * xmlGetNamespace:
8817 * @ctxt: an XML parser context
8818 * @prefix: the prefix to lookup
8819 *
8820 * Lookup the namespace name for the @prefix (which ca be NULL)
8821 * The prefix must come from the @ctxt->dict dictionnary
8822 *
8823 * Returns the namespace name or NULL if not bound
8824 */
8825 static const xmlChar *
xmlGetNamespace(xmlParserCtxtPtr ctxt,const xmlChar * prefix)8826 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8827 int i;
8828
8829 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8830 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8831 if (ctxt->nsTab[i] == prefix) {
8832 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8833 return(NULL);
8834 return(ctxt->nsTab[i + 1]);
8835 }
8836 return(NULL);
8837 }
8838
8839 /**
8840 * xmlParseQName:
8841 * @ctxt: an XML parser context
8842 * @prefix: pointer to store the prefix part
8843 *
8844 * parse an XML Namespace QName
8845 *
8846 * [6] QName ::= (Prefix ':')? LocalPart
8847 * [7] Prefix ::= NCName
8848 * [8] LocalPart ::= NCName
8849 *
8850 * Returns the Name parsed or NULL
8851 */
8852
8853 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8854 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8855 const xmlChar *l, *p;
8856
8857 GROW;
8858
8859 l = xmlParseNCName(ctxt);
8860 if (l == NULL) {
8861 if (CUR == ':') {
8862 l = xmlParseName(ctxt);
8863 if (l != NULL) {
8864 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8865 "Failed to parse QName '%s'\n", l, NULL, NULL);
8866 *prefix = NULL;
8867 return(l);
8868 }
8869 }
8870 return(NULL);
8871 }
8872 if (CUR == ':') {
8873 NEXT;
8874 p = l;
8875 l = xmlParseNCName(ctxt);
8876 if (l == NULL) {
8877 xmlChar *tmp;
8878
8879 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8880 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8881 l = xmlParseNmtoken(ctxt);
8882 if (l == NULL)
8883 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8884 else {
8885 tmp = xmlBuildQName(l, p, NULL, 0);
8886 xmlFree((char *)l);
8887 }
8888 p = xmlDictLookup(ctxt->dict, tmp, -1);
8889 if (tmp != NULL) xmlFree(tmp);
8890 *prefix = NULL;
8891 return(p);
8892 }
8893 if (CUR == ':') {
8894 xmlChar *tmp;
8895
8896 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8897 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8898 NEXT;
8899 tmp = (xmlChar *) xmlParseName(ctxt);
8900 if (tmp != NULL) {
8901 tmp = xmlBuildQName(tmp, l, NULL, 0);
8902 l = xmlDictLookup(ctxt->dict, tmp, -1);
8903 if (tmp != NULL) xmlFree(tmp);
8904 *prefix = p;
8905 return(l);
8906 }
8907 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8908 l = xmlDictLookup(ctxt->dict, tmp, -1);
8909 if (tmp != NULL) xmlFree(tmp);
8910 *prefix = p;
8911 return(l);
8912 }
8913 *prefix = p;
8914 } else
8915 *prefix = NULL;
8916 return(l);
8917 }
8918
8919 /**
8920 * xmlParseQNameAndCompare:
8921 * @ctxt: an XML parser context
8922 * @name: the localname
8923 * @prefix: the prefix, if any.
8924 *
8925 * parse an XML name and compares for match
8926 * (specialized for endtag parsing)
8927 *
8928 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8929 * and the name for mismatch
8930 */
8931
8932 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8933 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8934 xmlChar const *prefix) {
8935 const xmlChar *cmp;
8936 const xmlChar *in;
8937 const xmlChar *ret;
8938 const xmlChar *prefix2;
8939
8940 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8941
8942 GROW;
8943 in = ctxt->input->cur;
8944
8945 cmp = prefix;
8946 while (*in != 0 && *in == *cmp) {
8947 ++in;
8948 ++cmp;
8949 }
8950 if ((*cmp == 0) && (*in == ':')) {
8951 in++;
8952 cmp = name;
8953 while (*in != 0 && *in == *cmp) {
8954 ++in;
8955 ++cmp;
8956 }
8957 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8958 /* success */
8959 ctxt->input->cur = in;
8960 return((const xmlChar*) 1);
8961 }
8962 }
8963 /*
8964 * all strings coms from the dictionary, equality can be done directly
8965 */
8966 ret = xmlParseQName (ctxt, &prefix2);
8967 if ((ret == name) && (prefix == prefix2))
8968 return((const xmlChar*) 1);
8969 return ret;
8970 }
8971
8972 /**
8973 * xmlParseAttValueInternal:
8974 * @ctxt: an XML parser context
8975 * @len: attribute len result
8976 * @alloc: whether the attribute was reallocated as a new string
8977 * @normalize: if 1 then further non-CDATA normalization must be done
8978 *
8979 * parse a value for an attribute.
8980 * NOTE: if no normalization is needed, the routine will return pointers
8981 * directly from the data buffer.
8982 *
8983 * 3.3.3 Attribute-Value Normalization:
8984 * Before the value of an attribute is passed to the application or
8985 * checked for validity, the XML processor must normalize it as follows:
8986 * - a character reference is processed by appending the referenced
8987 * character to the attribute value
8988 * - an entity reference is processed by recursively processing the
8989 * replacement text of the entity
8990 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8991 * appending #x20 to the normalized value, except that only a single
8992 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8993 * parsed entity or the literal entity value of an internal parsed entity
8994 * - other characters are processed by appending them to the normalized value
8995 * If the declared value is not CDATA, then the XML processor must further
8996 * process the normalized attribute value by discarding any leading and
8997 * trailing space (#x20) characters, and by replacing sequences of space
8998 * (#x20) characters by a single space (#x20) character.
8999 * All attributes for which no declaration has been read should be treated
9000 * by a non-validating parser as if declared CDATA.
9001 *
9002 * Returns the AttValue parsed or NULL. The value has to be freed by the
9003 * caller if it was copied, this can be detected by val[*len] == 0.
9004 */
9005
9006 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * len,int * alloc,int normalize)9007 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9008 int normalize)
9009 {
9010 xmlChar limit = 0;
9011 const xmlChar *in = NULL, *start, *end, *last;
9012 xmlChar *ret = NULL;
9013 int line, col;
9014
9015 GROW;
9016 in = (xmlChar *) CUR_PTR;
9017 line = ctxt->input->line;
9018 col = ctxt->input->col;
9019 if (*in != '"' && *in != '\'') {
9020 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9021 return (NULL);
9022 }
9023 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9024
9025 /*
9026 * try to handle in this routine the most common case where no
9027 * allocation of a new string is required and where content is
9028 * pure ASCII.
9029 */
9030 limit = *in++;
9031 col++;
9032 end = ctxt->input->end;
9033 start = in;
9034 if (in >= end) {
9035 const xmlChar *oldbase = ctxt->input->base;
9036 GROW;
9037 if (oldbase != ctxt->input->base) {
9038 long delta = ctxt->input->base - oldbase;
9039 start = start + delta;
9040 in = in + delta;
9041 }
9042 end = ctxt->input->end;
9043 }
9044 if (normalize) {
9045 /*
9046 * Skip any leading spaces
9047 */
9048 while ((in < end) && (*in != limit) &&
9049 ((*in == 0x20) || (*in == 0x9) ||
9050 (*in == 0xA) || (*in == 0xD))) {
9051 if (*in == 0xA) {
9052 line++; col = 1;
9053 } else {
9054 col++;
9055 }
9056 in++;
9057 start = in;
9058 if (in >= end) {
9059 const xmlChar *oldbase = ctxt->input->base;
9060 GROW;
9061 if (ctxt->instate == XML_PARSER_EOF)
9062 return(NULL);
9063 if (oldbase != ctxt->input->base) {
9064 long delta = ctxt->input->base - oldbase;
9065 start = start + delta;
9066 in = in + delta;
9067 }
9068 end = ctxt->input->end;
9069 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9070 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9071 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9072 "AttValue length too long\n");
9073 return(NULL);
9074 }
9075 }
9076 }
9077 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9078 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9079 col++;
9080 if ((*in++ == 0x20) && (*in == 0x20)) break;
9081 if (in >= end) {
9082 const xmlChar *oldbase = ctxt->input->base;
9083 GROW;
9084 if (ctxt->instate == XML_PARSER_EOF)
9085 return(NULL);
9086 if (oldbase != ctxt->input->base) {
9087 long delta = ctxt->input->base - oldbase;
9088 start = start + delta;
9089 in = in + delta;
9090 }
9091 end = ctxt->input->end;
9092 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9093 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9094 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9095 "AttValue length too long\n");
9096 return(NULL);
9097 }
9098 }
9099 }
9100 last = in;
9101 /*
9102 * skip the trailing blanks
9103 */
9104 while ((last[-1] == 0x20) && (last > start)) last--;
9105 while ((in < end) && (*in != limit) &&
9106 ((*in == 0x20) || (*in == 0x9) ||
9107 (*in == 0xA) || (*in == 0xD))) {
9108 if (*in == 0xA) {
9109 line++, col = 1;
9110 } else {
9111 col++;
9112 }
9113 in++;
9114 if (in >= end) {
9115 const xmlChar *oldbase = ctxt->input->base;
9116 GROW;
9117 if (ctxt->instate == XML_PARSER_EOF)
9118 return(NULL);
9119 if (oldbase != ctxt->input->base) {
9120 long delta = ctxt->input->base - oldbase;
9121 start = start + delta;
9122 in = in + delta;
9123 last = last + delta;
9124 }
9125 end = ctxt->input->end;
9126 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9127 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9128 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9129 "AttValue length too long\n");
9130 return(NULL);
9131 }
9132 }
9133 }
9134 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9135 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9136 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9137 "AttValue length too long\n");
9138 return(NULL);
9139 }
9140 if (*in != limit) goto need_complex;
9141 } else {
9142 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9143 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9144 in++;
9145 col++;
9146 if (in >= end) {
9147 const xmlChar *oldbase = ctxt->input->base;
9148 GROW;
9149 if (ctxt->instate == XML_PARSER_EOF)
9150 return(NULL);
9151 if (oldbase != ctxt->input->base) {
9152 long delta = ctxt->input->base - oldbase;
9153 start = start + delta;
9154 in = in + delta;
9155 }
9156 end = ctxt->input->end;
9157 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9158 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9159 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9160 "AttValue length too long\n");
9161 return(NULL);
9162 }
9163 }
9164 }
9165 last = in;
9166 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9167 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9168 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9169 "AttValue length too long\n");
9170 return(NULL);
9171 }
9172 if (*in != limit) goto need_complex;
9173 }
9174 in++;
9175 col++;
9176 if (len != NULL) {
9177 *len = last - start;
9178 ret = (xmlChar *) start;
9179 } else {
9180 if (alloc) *alloc = 1;
9181 ret = xmlStrndup(start, last - start);
9182 }
9183 CUR_PTR = in;
9184 ctxt->input->line = line;
9185 ctxt->input->col = col;
9186 if (alloc) *alloc = 0;
9187 return ret;
9188 need_complex:
9189 if (alloc) *alloc = 1;
9190 return xmlParseAttValueComplex(ctxt, len, normalize);
9191 }
9192
9193 /**
9194 * xmlParseAttribute2:
9195 * @ctxt: an XML parser context
9196 * @pref: the element prefix
9197 * @elem: the element name
9198 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9199 * @value: a xmlChar ** used to store the value of the attribute
9200 * @len: an int * to save the length of the attribute
9201 * @alloc: an int * to indicate if the attribute was allocated
9202 *
9203 * parse an attribute in the new SAX2 framework.
9204 *
9205 * Returns the attribute name, and the value in *value, .
9206 */
9207
9208 static const xmlChar *
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,const xmlChar ** prefix,xmlChar ** value,int * len,int * alloc)9209 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9210 const xmlChar * pref, const xmlChar * elem,
9211 const xmlChar ** prefix, xmlChar ** value,
9212 int *len, int *alloc)
9213 {
9214 const xmlChar *name;
9215 xmlChar *val, *internal_val = NULL;
9216 int normalize = 0;
9217
9218 *value = NULL;
9219 GROW;
9220 name = xmlParseQName(ctxt, prefix);
9221 if (name == NULL) {
9222 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9223 "error parsing attribute name\n");
9224 return (NULL);
9225 }
9226
9227 /*
9228 * get the type if needed
9229 */
9230 if (ctxt->attsSpecial != NULL) {
9231 int type;
9232
9233 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
9234 pref, elem, *prefix, name);
9235 if (type != 0)
9236 normalize = 1;
9237 }
9238
9239 /*
9240 * read the value
9241 */
9242 SKIP_BLANKS;
9243 if (RAW == '=') {
9244 NEXT;
9245 SKIP_BLANKS;
9246 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9247 if (normalize) {
9248 /*
9249 * Sometimes a second normalisation pass for spaces is needed
9250 * but that only happens if charrefs or entities refernces
9251 * have been used in the attribute value, i.e. the attribute
9252 * value have been extracted in an allocated string already.
9253 */
9254 if (*alloc) {
9255 const xmlChar *val2;
9256
9257 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9258 if ((val2 != NULL) && (val2 != val)) {
9259 xmlFree(val);
9260 val = (xmlChar *) val2;
9261 }
9262 }
9263 }
9264 ctxt->instate = XML_PARSER_CONTENT;
9265 } else {
9266 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9267 "Specification mandate value for attribute %s\n",
9268 name);
9269 return (NULL);
9270 }
9271
9272 if (*prefix == ctxt->str_xml) {
9273 /*
9274 * Check that xml:lang conforms to the specification
9275 * No more registered as an error, just generate a warning now
9276 * since this was deprecated in XML second edition
9277 */
9278 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9279 internal_val = xmlStrndup(val, *len);
9280 if (!xmlCheckLanguageID(internal_val)) {
9281 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9282 "Malformed value for xml:lang : %s\n",
9283 internal_val, NULL);
9284 }
9285 }
9286
9287 /*
9288 * Check that xml:space conforms to the specification
9289 */
9290 if (xmlStrEqual(name, BAD_CAST "space")) {
9291 internal_val = xmlStrndup(val, *len);
9292 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9293 *(ctxt->space) = 0;
9294 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9295 *(ctxt->space) = 1;
9296 else {
9297 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9298 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9299 internal_val, NULL);
9300 }
9301 }
9302 if (internal_val) {
9303 xmlFree(internal_val);
9304 }
9305 }
9306
9307 *value = val;
9308 return (name);
9309 }
9310 /**
9311 * xmlParseStartTag2:
9312 * @ctxt: an XML parser context
9313 *
9314 * parse a start of tag either for rule element or
9315 * EmptyElement. In both case we don't parse the tag closing chars.
9316 * This routine is called when running SAX2 parsing
9317 *
9318 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9319 *
9320 * [ WFC: Unique Att Spec ]
9321 * No attribute name may appear more than once in the same start-tag or
9322 * empty-element tag.
9323 *
9324 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9325 *
9326 * [ WFC: Unique Att Spec ]
9327 * No attribute name may appear more than once in the same start-tag or
9328 * empty-element tag.
9329 *
9330 * With namespace:
9331 *
9332 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9333 *
9334 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9335 *
9336 * Returns the element name parsed
9337 */
9338
9339 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * tlen)9340 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9341 const xmlChar **URI, int *tlen) {
9342 const xmlChar *localname;
9343 const xmlChar *prefix;
9344 const xmlChar *attname;
9345 const xmlChar *aprefix;
9346 const xmlChar *nsname;
9347 xmlChar *attvalue;
9348 const xmlChar **atts = ctxt->atts;
9349 int maxatts = ctxt->maxatts;
9350 int nratts, nbatts, nbdef;
9351 int i, j, nbNs, attval, oldline, oldcol, inputNr;
9352 const xmlChar *base;
9353 unsigned long cur;
9354 int nsNr = ctxt->nsNr;
9355
9356 if (RAW != '<') return(NULL);
9357 NEXT1;
9358
9359 /*
9360 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9361 * point since the attribute values may be stored as pointers to
9362 * the buffer and calling SHRINK would destroy them !
9363 * The Shrinking is only possible once the full set of attribute
9364 * callbacks have been done.
9365 */
9366 reparse:
9367 SHRINK;
9368 base = ctxt->input->base;
9369 cur = ctxt->input->cur - ctxt->input->base;
9370 inputNr = ctxt->inputNr;
9371 oldline = ctxt->input->line;
9372 oldcol = ctxt->input->col;
9373 nbatts = 0;
9374 nratts = 0;
9375 nbdef = 0;
9376 nbNs = 0;
9377 attval = 0;
9378 /* Forget any namespaces added during an earlier parse of this element. */
9379 ctxt->nsNr = nsNr;
9380
9381 localname = xmlParseQName(ctxt, &prefix);
9382 if (localname == NULL) {
9383 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9384 "StartTag: invalid element name\n");
9385 return(NULL);
9386 }
9387 *tlen = ctxt->input->cur - ctxt->input->base - cur;
9388
9389 /*
9390 * Now parse the attributes, it ends up with the ending
9391 *
9392 * (S Attribute)* S?
9393 */
9394 SKIP_BLANKS;
9395 GROW;
9396 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9397 goto base_changed;
9398
9399 while (((RAW != '>') &&
9400 ((RAW != '/') || (NXT(1) != '>')) &&
9401 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9402 const xmlChar *q = CUR_PTR;
9403 unsigned int cons = ctxt->input->consumed;
9404 int len = -1, alloc = 0;
9405
9406 attname = xmlParseAttribute2(ctxt, prefix, localname,
9407 &aprefix, &attvalue, &len, &alloc);
9408 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) {
9409 if ((attvalue != NULL) && (alloc != 0))
9410 xmlFree(attvalue);
9411 attvalue = NULL;
9412 goto base_changed;
9413 }
9414 if ((attname != NULL) && (attvalue != NULL)) {
9415 if (len < 0) len = xmlStrlen(attvalue);
9416 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9417 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9418 xmlURIPtr uri;
9419
9420 if (URL == NULL) {
9421 xmlErrMemory(ctxt, "dictionary allocation failure");
9422 if ((attvalue != NULL) && (alloc != 0))
9423 xmlFree(attvalue);
9424 return(NULL);
9425 }
9426 if (*URL != 0) {
9427 uri = xmlParseURI((const char *) URL);
9428 if (uri == NULL) {
9429 xmlNsErr(ctxt, XML_WAR_NS_URI,
9430 "xmlns: '%s' is not a valid URI\n",
9431 URL, NULL, NULL);
9432 } else {
9433 if (uri->scheme == NULL) {
9434 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9435 "xmlns: URI %s is not absolute\n",
9436 URL, NULL, NULL);
9437 }
9438 xmlFreeURI(uri);
9439 }
9440 if (URL == ctxt->str_xml_ns) {
9441 if (attname != ctxt->str_xml) {
9442 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9443 "xml namespace URI cannot be the default namespace\n",
9444 NULL, NULL, NULL);
9445 }
9446 goto skip_default_ns;
9447 }
9448 if ((len == 29) &&
9449 (xmlStrEqual(URL,
9450 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9451 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9452 "reuse of the xmlns namespace name is forbidden\n",
9453 NULL, NULL, NULL);
9454 goto skip_default_ns;
9455 }
9456 }
9457 /*
9458 * check that it's not a defined namespace
9459 */
9460 for (j = 1;j <= nbNs;j++)
9461 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9462 break;
9463 if (j <= nbNs)
9464 xmlErrAttributeDup(ctxt, NULL, attname);
9465 else
9466 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9467 skip_default_ns:
9468 if (alloc != 0) xmlFree(attvalue);
9469 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9470 break;
9471 if (!IS_BLANK_CH(RAW)) {
9472 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9473 "attributes construct error\n");
9474 break;
9475 }
9476 SKIP_BLANKS;
9477 continue;
9478 }
9479 if (aprefix == ctxt->str_xmlns) {
9480 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9481 xmlURIPtr uri;
9482
9483 if (attname == ctxt->str_xml) {
9484 if (URL != ctxt->str_xml_ns) {
9485 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9486 "xml namespace prefix mapped to wrong URI\n",
9487 NULL, NULL, NULL);
9488 }
9489 /*
9490 * Do not keep a namespace definition node
9491 */
9492 goto skip_ns;
9493 }
9494 if (URL == ctxt->str_xml_ns) {
9495 if (attname != ctxt->str_xml) {
9496 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9497 "xml namespace URI mapped to wrong prefix\n",
9498 NULL, NULL, NULL);
9499 }
9500 goto skip_ns;
9501 }
9502 if (attname == ctxt->str_xmlns) {
9503 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9504 "redefinition of the xmlns prefix is forbidden\n",
9505 NULL, NULL, NULL);
9506 goto skip_ns;
9507 }
9508 if ((len == 29) &&
9509 (xmlStrEqual(URL,
9510 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9511 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9512 "reuse of the xmlns namespace name is forbidden\n",
9513 NULL, NULL, NULL);
9514 goto skip_ns;
9515 }
9516 if ((URL == NULL) || (URL[0] == 0)) {
9517 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9518 "xmlns:%s: Empty XML namespace is not allowed\n",
9519 attname, NULL, NULL);
9520 goto skip_ns;
9521 } else {
9522 uri = xmlParseURI((const char *) URL);
9523 if (uri == NULL) {
9524 xmlNsErr(ctxt, XML_WAR_NS_URI,
9525 "xmlns:%s: '%s' is not a valid URI\n",
9526 attname, URL, NULL);
9527 } else {
9528 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9529 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9530 "xmlns:%s: URI %s is not absolute\n",
9531 attname, URL, NULL);
9532 }
9533 xmlFreeURI(uri);
9534 }
9535 }
9536
9537 /*
9538 * check that it's not a defined namespace
9539 */
9540 for (j = 1;j <= nbNs;j++)
9541 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9542 break;
9543 if (j <= nbNs)
9544 xmlErrAttributeDup(ctxt, aprefix, attname);
9545 else
9546 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9547 skip_ns:
9548 if (alloc != 0) xmlFree(attvalue);
9549 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9550 break;
9551 if (!IS_BLANK_CH(RAW)) {
9552 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9553 "attributes construct error\n");
9554 break;
9555 }
9556 SKIP_BLANKS;
9557 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9558 goto base_changed;
9559 continue;
9560 }
9561
9562 /*
9563 * Add the pair to atts
9564 */
9565 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9566 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9567 if (attvalue[len] == 0)
9568 xmlFree(attvalue);
9569 goto failed;
9570 }
9571 maxatts = ctxt->maxatts;
9572 atts = ctxt->atts;
9573 }
9574 ctxt->attallocs[nratts++] = alloc;
9575 atts[nbatts++] = attname;
9576 atts[nbatts++] = aprefix;
9577 atts[nbatts++] = NULL; /* the URI will be fetched later */
9578 atts[nbatts++] = attvalue;
9579 attvalue += len;
9580 atts[nbatts++] = attvalue;
9581 /*
9582 * tag if some deallocation is needed
9583 */
9584 if (alloc != 0) attval = 1;
9585 } else {
9586 if ((attvalue != NULL) && (attvalue[len] == 0))
9587 xmlFree(attvalue);
9588 }
9589
9590 failed:
9591
9592 GROW
9593 if (ctxt->instate == XML_PARSER_EOF)
9594 break;
9595 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9596 goto base_changed;
9597 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9598 break;
9599 if (!IS_BLANK_CH(RAW)) {
9600 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9601 "attributes construct error\n");
9602 break;
9603 }
9604 SKIP_BLANKS;
9605 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9606 (attname == NULL) && (attvalue == NULL)) {
9607 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9608 "xmlParseStartTag: problem parsing attributes\n");
9609 break;
9610 }
9611 GROW;
9612 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9613 goto base_changed;
9614 }
9615
9616 /*
9617 * The attributes defaulting
9618 */
9619 if (ctxt->attsDefault != NULL) {
9620 xmlDefAttrsPtr defaults;
9621
9622 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9623 if (defaults != NULL) {
9624 for (i = 0;i < defaults->nbAttrs;i++) {
9625 attname = defaults->values[5 * i];
9626 aprefix = defaults->values[5 * i + 1];
9627
9628 /*
9629 * special work for namespaces defaulted defs
9630 */
9631 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9632 /*
9633 * check that it's not a defined namespace
9634 */
9635 for (j = 1;j <= nbNs;j++)
9636 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9637 break;
9638 if (j <= nbNs) continue;
9639
9640 nsname = xmlGetNamespace(ctxt, NULL);
9641 if (nsname != defaults->values[5 * i + 2]) {
9642 if (nsPush(ctxt, NULL,
9643 defaults->values[5 * i + 2]) > 0)
9644 nbNs++;
9645 }
9646 } else if (aprefix == ctxt->str_xmlns) {
9647 /*
9648 * check that it's not a defined namespace
9649 */
9650 for (j = 1;j <= nbNs;j++)
9651 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9652 break;
9653 if (j <= nbNs) continue;
9654
9655 nsname = xmlGetNamespace(ctxt, attname);
9656 if (nsname != defaults->values[2]) {
9657 if (nsPush(ctxt, attname,
9658 defaults->values[5 * i + 2]) > 0)
9659 nbNs++;
9660 }
9661 } else {
9662 /*
9663 * check that it's not a defined attribute
9664 */
9665 for (j = 0;j < nbatts;j+=5) {
9666 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9667 break;
9668 }
9669 if (j < nbatts) continue;
9670
9671 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9672 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9673 return(NULL);
9674 }
9675 maxatts = ctxt->maxatts;
9676 atts = ctxt->atts;
9677 }
9678 atts[nbatts++] = attname;
9679 atts[nbatts++] = aprefix;
9680 if (aprefix == NULL)
9681 atts[nbatts++] = NULL;
9682 else
9683 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9684 atts[nbatts++] = defaults->values[5 * i + 2];
9685 atts[nbatts++] = defaults->values[5 * i + 3];
9686 if ((ctxt->standalone == 1) &&
9687 (defaults->values[5 * i + 4] != NULL)) {
9688 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9689 "standalone: attribute %s on %s defaulted from external subset\n",
9690 attname, localname);
9691 }
9692 nbdef++;
9693 }
9694 }
9695 }
9696 }
9697
9698 /*
9699 * The attributes checkings
9700 */
9701 for (i = 0; i < nbatts;i += 5) {
9702 /*
9703 * The default namespace does not apply to attribute names.
9704 */
9705 if (atts[i + 1] != NULL) {
9706 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9707 if (nsname == NULL) {
9708 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9709 "Namespace prefix %s for %s on %s is not defined\n",
9710 atts[i + 1], atts[i], localname);
9711 }
9712 atts[i + 2] = nsname;
9713 } else
9714 nsname = NULL;
9715 /*
9716 * [ WFC: Unique Att Spec ]
9717 * No attribute name may appear more than once in the same
9718 * start-tag or empty-element tag.
9719 * As extended by the Namespace in XML REC.
9720 */
9721 for (j = 0; j < i;j += 5) {
9722 if (atts[i] == atts[j]) {
9723 if (atts[i+1] == atts[j+1]) {
9724 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9725 break;
9726 }
9727 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9728 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9729 "Namespaced Attribute %s in '%s' redefined\n",
9730 atts[i], nsname, NULL);
9731 break;
9732 }
9733 }
9734 }
9735 }
9736
9737 nsname = xmlGetNamespace(ctxt, prefix);
9738 if ((prefix != NULL) && (nsname == NULL)) {
9739 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9740 "Namespace prefix %s on %s is not defined\n",
9741 prefix, localname, NULL);
9742 }
9743 *pref = prefix;
9744 *URI = nsname;
9745
9746 /*
9747 * SAX: Start of Element !
9748 */
9749 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9750 (!ctxt->disableSAX)) {
9751 if (nbNs > 0)
9752 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9753 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9754 nbatts / 5, nbdef, atts);
9755 else
9756 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9757 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9758 }
9759
9760 /*
9761 * Free up attribute allocated strings if needed
9762 */
9763 if (attval != 0) {
9764 for (i = 3,j = 0; j < nratts;i += 5,j++)
9765 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9766 xmlFree((xmlChar *) atts[i]);
9767 }
9768
9769 return(localname);
9770
9771 base_changed:
9772 /*
9773 * the attribute strings are valid iif the base didn't changed
9774 */
9775 if (attval != 0) {
9776 for (i = 3,j = 0; j < nratts;i += 5,j++)
9777 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9778 xmlFree((xmlChar *) atts[i]);
9779 }
9780
9781 /*
9782 * We can't switch from one entity to another in the middle
9783 * of a start tag
9784 */
9785 if (inputNr != ctxt->inputNr) {
9786 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
9787 "Start tag doesn't start and stop in the same entity\n");
9788 return(NULL);
9789 }
9790
9791 ctxt->input->cur = ctxt->input->base + cur;
9792 ctxt->input->line = oldline;
9793 ctxt->input->col = oldcol;
9794 if (ctxt->wellFormed == 1) {
9795 goto reparse;
9796 }
9797 return(NULL);
9798 }
9799
9800 /**
9801 * xmlParseEndTag2:
9802 * @ctxt: an XML parser context
9803 * @line: line of the start tag
9804 * @nsNr: number of namespaces on the start tag
9805 *
9806 * parse an end of tag
9807 *
9808 * [42] ETag ::= '</' Name S? '>'
9809 *
9810 * With namespace
9811 *
9812 * [NS 9] ETag ::= '</' QName S? '>'
9813 */
9814
9815 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr,int tlen)9816 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9817 const xmlChar *URI, int line, int nsNr, int tlen) {
9818 const xmlChar *name;
9819
9820 GROW;
9821 if ((RAW != '<') || (NXT(1) != '/')) {
9822 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9823 return;
9824 }
9825 SKIP(2);
9826
9827 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9828 if (ctxt->input->cur[tlen] == '>') {
9829 ctxt->input->cur += tlen + 1;
9830 ctxt->input->col += tlen + 1;
9831 goto done;
9832 }
9833 ctxt->input->cur += tlen;
9834 ctxt->input->col += tlen;
9835 name = (xmlChar*)1;
9836 } else {
9837 if (prefix == NULL)
9838 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9839 else
9840 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9841 }
9842
9843 /*
9844 * We should definitely be at the ending "S? '>'" part
9845 */
9846 GROW;
9847 if (ctxt->instate == XML_PARSER_EOF)
9848 return;
9849 SKIP_BLANKS;
9850 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9851 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9852 } else
9853 NEXT1;
9854
9855 /*
9856 * [ WFC: Element Type Match ]
9857 * The Name in an element's end-tag must match the element type in the
9858 * start-tag.
9859 *
9860 */
9861 if (name != (xmlChar*)1) {
9862 if (name == NULL) name = BAD_CAST "unparseable";
9863 if ((line == 0) && (ctxt->node != NULL))
9864 line = ctxt->node->line;
9865 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9866 "Opening and ending tag mismatch: %s line %d and %s\n",
9867 ctxt->name, line, name);
9868 }
9869
9870 /*
9871 * SAX: End of Tag
9872 */
9873 done:
9874 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9875 (!ctxt->disableSAX))
9876 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9877
9878 spacePop(ctxt);
9879 if (nsNr != 0)
9880 nsPop(ctxt, nsNr);
9881 return;
9882 }
9883
9884 /**
9885 * xmlParseCDSect:
9886 * @ctxt: an XML parser context
9887 *
9888 * Parse escaped pure raw content.
9889 *
9890 * [18] CDSect ::= CDStart CData CDEnd
9891 *
9892 * [19] CDStart ::= '<![CDATA['
9893 *
9894 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9895 *
9896 * [21] CDEnd ::= ']]>'
9897 */
9898 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9899 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9900 xmlChar *buf = NULL;
9901 int len = 0;
9902 int size = XML_PARSER_BUFFER_SIZE;
9903 int r, rl;
9904 int s, sl;
9905 int cur, l;
9906 int count = 0;
9907
9908 /* Check 2.6.0 was NXT(0) not RAW */
9909 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9910 SKIP(9);
9911 } else
9912 return;
9913
9914 ctxt->instate = XML_PARSER_CDATA_SECTION;
9915 r = CUR_CHAR(rl);
9916 if (!IS_CHAR(r)) {
9917 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9918 ctxt->instate = XML_PARSER_CONTENT;
9919 return;
9920 }
9921 NEXTL(rl);
9922 s = CUR_CHAR(sl);
9923 if (!IS_CHAR(s)) {
9924 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9925 ctxt->instate = XML_PARSER_CONTENT;
9926 return;
9927 }
9928 NEXTL(sl);
9929 cur = CUR_CHAR(l);
9930 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9931 if (buf == NULL) {
9932 xmlErrMemory(ctxt, NULL);
9933 return;
9934 }
9935 while (IS_CHAR(cur) &&
9936 ((r != ']') || (s != ']') || (cur != '>'))) {
9937 if (len + 5 >= size) {
9938 xmlChar *tmp;
9939
9940 if ((size > XML_MAX_TEXT_LENGTH) &&
9941 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9942 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9943 "CData section too big found", NULL);
9944 xmlFree (buf);
9945 return;
9946 }
9947 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9948 if (tmp == NULL) {
9949 xmlFree(buf);
9950 xmlErrMemory(ctxt, NULL);
9951 return;
9952 }
9953 buf = tmp;
9954 size *= 2;
9955 }
9956 COPY_BUF(rl,buf,len,r);
9957 r = s;
9958 rl = sl;
9959 s = cur;
9960 sl = l;
9961 count++;
9962 if (count > 50) {
9963 GROW;
9964 if (ctxt->instate == XML_PARSER_EOF) {
9965 xmlFree(buf);
9966 return;
9967 }
9968 count = 0;
9969 }
9970 NEXTL(l);
9971 cur = CUR_CHAR(l);
9972 }
9973 buf[len] = 0;
9974 ctxt->instate = XML_PARSER_CONTENT;
9975 if (cur != '>') {
9976 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9977 "CData section not finished\n%.50s\n", buf);
9978 xmlFree(buf);
9979 return;
9980 }
9981 NEXTL(l);
9982
9983 /*
9984 * OK the buffer is to be consumed as cdata.
9985 */
9986 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9987 if (ctxt->sax->cdataBlock != NULL)
9988 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9989 else if (ctxt->sax->characters != NULL)
9990 ctxt->sax->characters(ctxt->userData, buf, len);
9991 }
9992 xmlFree(buf);
9993 }
9994
9995 /**
9996 * xmlParseContent:
9997 * @ctxt: an XML parser context
9998 *
9999 * Parse a content:
10000 *
10001 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10002 */
10003
10004 void
xmlParseContent(xmlParserCtxtPtr ctxt)10005 xmlParseContent(xmlParserCtxtPtr ctxt) {
10006 GROW;
10007 while ((RAW != 0) &&
10008 ((RAW != '<') || (NXT(1) != '/')) &&
10009 (ctxt->instate != XML_PARSER_EOF)) {
10010 const xmlChar *test = CUR_PTR;
10011 unsigned int cons = ctxt->input->consumed;
10012 const xmlChar *cur = ctxt->input->cur;
10013
10014 /*
10015 * First case : a Processing Instruction.
10016 */
10017 if ((*cur == '<') && (cur[1] == '?')) {
10018 xmlParsePI(ctxt);
10019 }
10020
10021 /*
10022 * Second case : a CDSection
10023 */
10024 /* 2.6.0 test was *cur not RAW */
10025 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
10026 xmlParseCDSect(ctxt);
10027 }
10028
10029 /*
10030 * Third case : a comment
10031 */
10032 else if ((*cur == '<') && (NXT(1) == '!') &&
10033 (NXT(2) == '-') && (NXT(3) == '-')) {
10034 xmlParseComment(ctxt);
10035 ctxt->instate = XML_PARSER_CONTENT;
10036 }
10037
10038 /*
10039 * Fourth case : a sub-element.
10040 */
10041 else if (*cur == '<') {
10042 xmlParseElement(ctxt);
10043 }
10044
10045 /*
10046 * Fifth case : a reference. If if has not been resolved,
10047 * parsing returns it's Name, create the node
10048 */
10049
10050 else if (*cur == '&') {
10051 xmlParseReference(ctxt);
10052 }
10053
10054 /*
10055 * Last case, text. Note that References are handled directly.
10056 */
10057 else {
10058 xmlParseCharData(ctxt, 0);
10059 }
10060
10061 GROW;
10062 /*
10063 * Pop-up of finished entities.
10064 */
10065 while ((RAW == 0) && (ctxt->inputNr > 1))
10066 xmlPopInput(ctxt);
10067 SHRINK;
10068
10069 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
10070 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10071 "detected an error in element content\n");
10072 xmlHaltParser(ctxt);
10073 break;
10074 }
10075 }
10076 }
10077
10078 /**
10079 * xmlParseElement:
10080 * @ctxt: an XML parser context
10081 *
10082 * parse an XML element, this is highly recursive
10083 *
10084 * [39] element ::= EmptyElemTag | STag content ETag
10085 *
10086 * [ WFC: Element Type Match ]
10087 * The Name in an element's end-tag must match the element type in the
10088 * start-tag.
10089 *
10090 */
10091
10092 void
xmlParseElement(xmlParserCtxtPtr ctxt)10093 xmlParseElement(xmlParserCtxtPtr ctxt) {
10094 const xmlChar *name;
10095 const xmlChar *prefix = NULL;
10096 const xmlChar *URI = NULL;
10097 xmlParserNodeInfo node_info;
10098 int line, tlen = 0;
10099 xmlNodePtr ret;
10100 int nsNr = ctxt->nsNr;
10101
10102 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10103 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10104 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10105 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10106 xmlParserMaxDepth);
10107 xmlHaltParser(ctxt);
10108 return;
10109 }
10110
10111 /* Capture start position */
10112 if (ctxt->record_info) {
10113 node_info.begin_pos = ctxt->input->consumed +
10114 (CUR_PTR - ctxt->input->base);
10115 node_info.begin_line = ctxt->input->line;
10116 }
10117
10118 if (ctxt->spaceNr == 0)
10119 spacePush(ctxt, -1);
10120 else if (*ctxt->space == -2)
10121 spacePush(ctxt, -1);
10122 else
10123 spacePush(ctxt, *ctxt->space);
10124
10125 line = ctxt->input->line;
10126 #ifdef LIBXML_SAX1_ENABLED
10127 if (ctxt->sax2)
10128 #endif /* LIBXML_SAX1_ENABLED */
10129 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10130 #ifdef LIBXML_SAX1_ENABLED
10131 else
10132 name = xmlParseStartTag(ctxt);
10133 #endif /* LIBXML_SAX1_ENABLED */
10134 if (ctxt->instate == XML_PARSER_EOF)
10135 return;
10136 if (name == NULL) {
10137 spacePop(ctxt);
10138 return;
10139 }
10140 namePush(ctxt, name);
10141 ret = ctxt->node;
10142
10143 #ifdef LIBXML_VALID_ENABLED
10144 /*
10145 * [ VC: Root Element Type ]
10146 * The Name in the document type declaration must match the element
10147 * type of the root element.
10148 */
10149 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10150 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10151 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10152 #endif /* LIBXML_VALID_ENABLED */
10153
10154 /*
10155 * Check for an Empty Element.
10156 */
10157 if ((RAW == '/') && (NXT(1) == '>')) {
10158 SKIP(2);
10159 if (ctxt->sax2) {
10160 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10161 (!ctxt->disableSAX))
10162 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10163 #ifdef LIBXML_SAX1_ENABLED
10164 } else {
10165 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10166 (!ctxt->disableSAX))
10167 ctxt->sax->endElement(ctxt->userData, name);
10168 #endif /* LIBXML_SAX1_ENABLED */
10169 }
10170 namePop(ctxt);
10171 spacePop(ctxt);
10172 if (nsNr != ctxt->nsNr)
10173 nsPop(ctxt, ctxt->nsNr - nsNr);
10174 if ( ret != NULL && ctxt->record_info ) {
10175 node_info.end_pos = ctxt->input->consumed +
10176 (CUR_PTR - ctxt->input->base);
10177 node_info.end_line = ctxt->input->line;
10178 node_info.node = ret;
10179 xmlParserAddNodeInfo(ctxt, &node_info);
10180 }
10181 return;
10182 }
10183 if (RAW == '>') {
10184 NEXT1;
10185 } else {
10186 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10187 "Couldn't find end of Start Tag %s line %d\n",
10188 name, line, NULL);
10189
10190 /*
10191 * end of parsing of this node.
10192 */
10193 nodePop(ctxt);
10194 namePop(ctxt);
10195 spacePop(ctxt);
10196 if (nsNr != ctxt->nsNr)
10197 nsPop(ctxt, ctxt->nsNr - nsNr);
10198
10199 /*
10200 * Capture end position and add node
10201 */
10202 if ( ret != NULL && ctxt->record_info ) {
10203 node_info.end_pos = ctxt->input->consumed +
10204 (CUR_PTR - ctxt->input->base);
10205 node_info.end_line = ctxt->input->line;
10206 node_info.node = ret;
10207 xmlParserAddNodeInfo(ctxt, &node_info);
10208 }
10209 return;
10210 }
10211
10212 /*
10213 * Parse the content of the element:
10214 */
10215 xmlParseContent(ctxt);
10216 if (ctxt->instate == XML_PARSER_EOF)
10217 return;
10218 if (!IS_BYTE_CHAR(RAW)) {
10219 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10220 "Premature end of data in tag %s line %d\n",
10221 name, line, NULL);
10222
10223 /*
10224 * end of parsing of this node.
10225 */
10226 nodePop(ctxt);
10227 namePop(ctxt);
10228 spacePop(ctxt);
10229 if (nsNr != ctxt->nsNr)
10230 nsPop(ctxt, ctxt->nsNr - nsNr);
10231 return;
10232 }
10233
10234 /*
10235 * parse the end of tag: '</' should be here.
10236 */
10237 if (ctxt->sax2) {
10238 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10239 namePop(ctxt);
10240 }
10241 #ifdef LIBXML_SAX1_ENABLED
10242 else
10243 xmlParseEndTag1(ctxt, line);
10244 #endif /* LIBXML_SAX1_ENABLED */
10245
10246 /*
10247 * Capture end position and add node
10248 */
10249 if ( ret != NULL && ctxt->record_info ) {
10250 node_info.end_pos = ctxt->input->consumed +
10251 (CUR_PTR - ctxt->input->base);
10252 node_info.end_line = ctxt->input->line;
10253 node_info.node = ret;
10254 xmlParserAddNodeInfo(ctxt, &node_info);
10255 }
10256 }
10257
10258 /**
10259 * xmlParseVersionNum:
10260 * @ctxt: an XML parser context
10261 *
10262 * parse the XML version value.
10263 *
10264 * [26] VersionNum ::= '1.' [0-9]+
10265 *
10266 * In practice allow [0-9].[0-9]+ at that level
10267 *
10268 * Returns the string giving the XML version number, or NULL
10269 */
10270 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)10271 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10272 xmlChar *buf = NULL;
10273 int len = 0;
10274 int size = 10;
10275 xmlChar cur;
10276
10277 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10278 if (buf == NULL) {
10279 xmlErrMemory(ctxt, NULL);
10280 return(NULL);
10281 }
10282 cur = CUR;
10283 if (!((cur >= '0') && (cur <= '9'))) {
10284 xmlFree(buf);
10285 return(NULL);
10286 }
10287 buf[len++] = cur;
10288 NEXT;
10289 cur=CUR;
10290 if (cur != '.') {
10291 xmlFree(buf);
10292 return(NULL);
10293 }
10294 buf[len++] = cur;
10295 NEXT;
10296 cur=CUR;
10297 while ((cur >= '0') && (cur <= '9')) {
10298 if (len + 1 >= size) {
10299 xmlChar *tmp;
10300
10301 size *= 2;
10302 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10303 if (tmp == NULL) {
10304 xmlFree(buf);
10305 xmlErrMemory(ctxt, NULL);
10306 return(NULL);
10307 }
10308 buf = tmp;
10309 }
10310 buf[len++] = cur;
10311 NEXT;
10312 cur=CUR;
10313 }
10314 buf[len] = 0;
10315 return(buf);
10316 }
10317
10318 /**
10319 * xmlParseVersionInfo:
10320 * @ctxt: an XML parser context
10321 *
10322 * parse the XML version.
10323 *
10324 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10325 *
10326 * [25] Eq ::= S? '=' S?
10327 *
10328 * Returns the version string, e.g. "1.0"
10329 */
10330
10331 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)10332 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10333 xmlChar *version = NULL;
10334
10335 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10336 SKIP(7);
10337 SKIP_BLANKS;
10338 if (RAW != '=') {
10339 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10340 return(NULL);
10341 }
10342 NEXT;
10343 SKIP_BLANKS;
10344 if (RAW == '"') {
10345 NEXT;
10346 version = xmlParseVersionNum(ctxt);
10347 if (RAW != '"') {
10348 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10349 } else
10350 NEXT;
10351 } else if (RAW == '\''){
10352 NEXT;
10353 version = xmlParseVersionNum(ctxt);
10354 if (RAW != '\'') {
10355 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10356 } else
10357 NEXT;
10358 } else {
10359 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10360 }
10361 }
10362 return(version);
10363 }
10364
10365 /**
10366 * xmlParseEncName:
10367 * @ctxt: an XML parser context
10368 *
10369 * parse the XML encoding name
10370 *
10371 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10372 *
10373 * Returns the encoding name value or NULL
10374 */
10375 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)10376 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10377 xmlChar *buf = NULL;
10378 int len = 0;
10379 int size = 10;
10380 xmlChar cur;
10381
10382 cur = CUR;
10383 if (((cur >= 'a') && (cur <= 'z')) ||
10384 ((cur >= 'A') && (cur <= 'Z'))) {
10385 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10386 if (buf == NULL) {
10387 xmlErrMemory(ctxt, NULL);
10388 return(NULL);
10389 }
10390
10391 buf[len++] = cur;
10392 NEXT;
10393 cur = CUR;
10394 while (((cur >= 'a') && (cur <= 'z')) ||
10395 ((cur >= 'A') && (cur <= 'Z')) ||
10396 ((cur >= '0') && (cur <= '9')) ||
10397 (cur == '.') || (cur == '_') ||
10398 (cur == '-')) {
10399 if (len + 1 >= size) {
10400 xmlChar *tmp;
10401
10402 size *= 2;
10403 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10404 if (tmp == NULL) {
10405 xmlErrMemory(ctxt, NULL);
10406 xmlFree(buf);
10407 return(NULL);
10408 }
10409 buf = tmp;
10410 }
10411 buf[len++] = cur;
10412 NEXT;
10413 cur = CUR;
10414 if (cur == 0) {
10415 SHRINK;
10416 GROW;
10417 cur = CUR;
10418 }
10419 }
10420 buf[len] = 0;
10421 } else {
10422 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10423 }
10424 return(buf);
10425 }
10426
10427 /**
10428 * xmlParseEncodingDecl:
10429 * @ctxt: an XML parser context
10430 *
10431 * parse the XML encoding declaration
10432 *
10433 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10434 *
10435 * this setups the conversion filters.
10436 *
10437 * Returns the encoding value or NULL
10438 */
10439
10440 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)10441 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10442 xmlChar *encoding = NULL;
10443
10444 SKIP_BLANKS;
10445 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10446 SKIP(8);
10447 SKIP_BLANKS;
10448 if (RAW != '=') {
10449 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10450 return(NULL);
10451 }
10452 NEXT;
10453 SKIP_BLANKS;
10454 if (RAW == '"') {
10455 NEXT;
10456 encoding = xmlParseEncName(ctxt);
10457 if (RAW != '"') {
10458 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10459 xmlFree((xmlChar *) encoding);
10460 return(NULL);
10461 } else
10462 NEXT;
10463 } else if (RAW == '\''){
10464 NEXT;
10465 encoding = xmlParseEncName(ctxt);
10466 if (RAW != '\'') {
10467 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10468 xmlFree((xmlChar *) encoding);
10469 return(NULL);
10470 } else
10471 NEXT;
10472 } else {
10473 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10474 }
10475
10476 /*
10477 * Non standard parsing, allowing the user to ignore encoding
10478 */
10479 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10480 xmlFree((xmlChar *) encoding);
10481 return(NULL);
10482 }
10483
10484 /*
10485 * UTF-16 encoding stwich has already taken place at this stage,
10486 * more over the little-endian/big-endian selection is already done
10487 */
10488 if ((encoding != NULL) &&
10489 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10490 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10491 /*
10492 * If no encoding was passed to the parser, that we are
10493 * using UTF-16 and no decoder is present i.e. the
10494 * document is apparently UTF-8 compatible, then raise an
10495 * encoding mismatch fatal error
10496 */
10497 if ((ctxt->encoding == NULL) &&
10498 (ctxt->input->buf != NULL) &&
10499 (ctxt->input->buf->encoder == NULL)) {
10500 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10501 "Document labelled UTF-16 but has UTF-8 content\n");
10502 }
10503 if (ctxt->encoding != NULL)
10504 xmlFree((xmlChar *) ctxt->encoding);
10505 ctxt->encoding = encoding;
10506 }
10507 /*
10508 * UTF-8 encoding is handled natively
10509 */
10510 else if ((encoding != NULL) &&
10511 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10512 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10513 if (ctxt->encoding != NULL)
10514 xmlFree((xmlChar *) ctxt->encoding);
10515 ctxt->encoding = encoding;
10516 }
10517 else if (encoding != NULL) {
10518 xmlCharEncodingHandlerPtr handler;
10519
10520 if (ctxt->input->encoding != NULL)
10521 xmlFree((xmlChar *) ctxt->input->encoding);
10522 ctxt->input->encoding = encoding;
10523
10524 handler = xmlFindCharEncodingHandler((const char *) encoding);
10525 if (handler != NULL) {
10526 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10527 /* failed to convert */
10528 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10529 return(NULL);
10530 }
10531 } else {
10532 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10533 "Unsupported encoding %s\n", encoding);
10534 return(NULL);
10535 }
10536 }
10537 }
10538 return(encoding);
10539 }
10540
10541 /**
10542 * xmlParseSDDecl:
10543 * @ctxt: an XML parser context
10544 *
10545 * parse the XML standalone declaration
10546 *
10547 * [32] SDDecl ::= S 'standalone' Eq
10548 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10549 *
10550 * [ VC: Standalone Document Declaration ]
10551 * TODO The standalone document declaration must have the value "no"
10552 * if any external markup declarations contain declarations of:
10553 * - attributes with default values, if elements to which these
10554 * attributes apply appear in the document without specifications
10555 * of values for these attributes, or
10556 * - entities (other than amp, lt, gt, apos, quot), if references
10557 * to those entities appear in the document, or
10558 * - attributes with values subject to normalization, where the
10559 * attribute appears in the document with a value which will change
10560 * as a result of normalization, or
10561 * - element types with element content, if white space occurs directly
10562 * within any instance of those types.
10563 *
10564 * Returns:
10565 * 1 if standalone="yes"
10566 * 0 if standalone="no"
10567 * -2 if standalone attribute is missing or invalid
10568 * (A standalone value of -2 means that the XML declaration was found,
10569 * but no value was specified for the standalone attribute).
10570 */
10571
10572 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10573 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10574 int standalone = -2;
10575
10576 SKIP_BLANKS;
10577 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10578 SKIP(10);
10579 SKIP_BLANKS;
10580 if (RAW != '=') {
10581 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10582 return(standalone);
10583 }
10584 NEXT;
10585 SKIP_BLANKS;
10586 if (RAW == '\''){
10587 NEXT;
10588 if ((RAW == 'n') && (NXT(1) == 'o')) {
10589 standalone = 0;
10590 SKIP(2);
10591 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10592 (NXT(2) == 's')) {
10593 standalone = 1;
10594 SKIP(3);
10595 } else {
10596 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10597 }
10598 if (RAW != '\'') {
10599 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10600 } else
10601 NEXT;
10602 } else if (RAW == '"'){
10603 NEXT;
10604 if ((RAW == 'n') && (NXT(1) == 'o')) {
10605 standalone = 0;
10606 SKIP(2);
10607 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10608 (NXT(2) == 's')) {
10609 standalone = 1;
10610 SKIP(3);
10611 } else {
10612 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10613 }
10614 if (RAW != '"') {
10615 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10616 } else
10617 NEXT;
10618 } else {
10619 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10620 }
10621 }
10622 return(standalone);
10623 }
10624
10625 /**
10626 * xmlParseXMLDecl:
10627 * @ctxt: an XML parser context
10628 *
10629 * parse an XML declaration header
10630 *
10631 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10632 */
10633
10634 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10635 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10636 xmlChar *version;
10637
10638 /*
10639 * This value for standalone indicates that the document has an
10640 * XML declaration but it does not have a standalone attribute.
10641 * It will be overwritten later if a standalone attribute is found.
10642 */
10643 ctxt->input->standalone = -2;
10644
10645 /*
10646 * We know that '<?xml' is here.
10647 */
10648 SKIP(5);
10649
10650 if (!IS_BLANK_CH(RAW)) {
10651 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10652 "Blank needed after '<?xml'\n");
10653 }
10654 SKIP_BLANKS;
10655
10656 /*
10657 * We must have the VersionInfo here.
10658 */
10659 version = xmlParseVersionInfo(ctxt);
10660 if (version == NULL) {
10661 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10662 } else {
10663 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10664 /*
10665 * Changed here for XML-1.0 5th edition
10666 */
10667 if (ctxt->options & XML_PARSE_OLD10) {
10668 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10669 "Unsupported version '%s'\n",
10670 version);
10671 } else {
10672 if ((version[0] == '1') && ((version[1] == '.'))) {
10673 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10674 "Unsupported version '%s'\n",
10675 version, NULL);
10676 } else {
10677 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10678 "Unsupported version '%s'\n",
10679 version);
10680 }
10681 }
10682 }
10683 if (ctxt->version != NULL)
10684 xmlFree((void *) ctxt->version);
10685 ctxt->version = version;
10686 }
10687
10688 /*
10689 * We may have the encoding declaration
10690 */
10691 if (!IS_BLANK_CH(RAW)) {
10692 if ((RAW == '?') && (NXT(1) == '>')) {
10693 SKIP(2);
10694 return;
10695 }
10696 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10697 }
10698 xmlParseEncodingDecl(ctxt);
10699 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10700 (ctxt->instate == XML_PARSER_EOF)) {
10701 /*
10702 * The XML REC instructs us to stop parsing right here
10703 */
10704 return;
10705 }
10706
10707 /*
10708 * We may have the standalone status.
10709 */
10710 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10711 if ((RAW == '?') && (NXT(1) == '>')) {
10712 SKIP(2);
10713 return;
10714 }
10715 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10716 }
10717
10718 /*
10719 * We can grow the input buffer freely at that point
10720 */
10721 GROW;
10722
10723 SKIP_BLANKS;
10724 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10725
10726 SKIP_BLANKS;
10727 if ((RAW == '?') && (NXT(1) == '>')) {
10728 SKIP(2);
10729 } else if (RAW == '>') {
10730 /* Deprecated old WD ... */
10731 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10732 NEXT;
10733 } else {
10734 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10735 MOVETO_ENDTAG(CUR_PTR);
10736 NEXT;
10737 }
10738 }
10739
10740 /**
10741 * xmlParseMisc:
10742 * @ctxt: an XML parser context
10743 *
10744 * parse an XML Misc* optional field.
10745 *
10746 * [27] Misc ::= Comment | PI | S
10747 */
10748
10749 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10750 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10751 while ((ctxt->instate != XML_PARSER_EOF) &&
10752 (((RAW == '<') && (NXT(1) == '?')) ||
10753 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10754 IS_BLANK_CH(CUR))) {
10755 if ((RAW == '<') && (NXT(1) == '?')) {
10756 xmlParsePI(ctxt);
10757 } else if (IS_BLANK_CH(CUR)) {
10758 NEXT;
10759 } else
10760 xmlParseComment(ctxt);
10761 }
10762 }
10763
10764 /**
10765 * xmlParseDocument:
10766 * @ctxt: an XML parser context
10767 *
10768 * parse an XML document (and build a tree if using the standard SAX
10769 * interface).
10770 *
10771 * [1] document ::= prolog element Misc*
10772 *
10773 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10774 *
10775 * Returns 0, -1 in case of error. the parser context is augmented
10776 * as a result of the parsing.
10777 */
10778
10779 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10780 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10781 xmlChar start[4];
10782 xmlCharEncoding enc;
10783
10784 xmlInitParser();
10785
10786 if ((ctxt == NULL) || (ctxt->input == NULL))
10787 return(-1);
10788
10789 GROW;
10790
10791 /*
10792 * SAX: detecting the level.
10793 */
10794 xmlDetectSAX2(ctxt);
10795
10796 /*
10797 * SAX: beginning of the document processing.
10798 */
10799 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10800 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10801 if (ctxt->instate == XML_PARSER_EOF)
10802 return(-1);
10803
10804 if ((ctxt->encoding == NULL) &&
10805 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10806 /*
10807 * Get the 4 first bytes and decode the charset
10808 * if enc != XML_CHAR_ENCODING_NONE
10809 * plug some encoding conversion routines.
10810 */
10811 start[0] = RAW;
10812 start[1] = NXT(1);
10813 start[2] = NXT(2);
10814 start[3] = NXT(3);
10815 enc = xmlDetectCharEncoding(&start[0], 4);
10816 if (enc != XML_CHAR_ENCODING_NONE) {
10817 xmlSwitchEncoding(ctxt, enc);
10818 }
10819 }
10820
10821
10822 if (CUR == 0) {
10823 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10824 return(-1);
10825 }
10826
10827 /*
10828 * Check for the XMLDecl in the Prolog.
10829 * do not GROW here to avoid the detected encoder to decode more
10830 * than just the first line, unless the amount of data is really
10831 * too small to hold "<?xml version="1.0" encoding="foo"
10832 */
10833 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10834 GROW;
10835 }
10836 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10837
10838 /*
10839 * Note that we will switch encoding on the fly.
10840 */
10841 xmlParseXMLDecl(ctxt);
10842 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10843 (ctxt->instate == XML_PARSER_EOF)) {
10844 /*
10845 * The XML REC instructs us to stop parsing right here
10846 */
10847 return(-1);
10848 }
10849 ctxt->standalone = ctxt->input->standalone;
10850 SKIP_BLANKS;
10851 } else {
10852 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10853 }
10854 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10855 ctxt->sax->startDocument(ctxt->userData);
10856 if (ctxt->instate == XML_PARSER_EOF)
10857 return(-1);
10858 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10859 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10860 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10861 }
10862
10863 /*
10864 * The Misc part of the Prolog
10865 */
10866 GROW;
10867 xmlParseMisc(ctxt);
10868
10869 /*
10870 * Then possibly doc type declaration(s) and more Misc
10871 * (doctypedecl Misc*)?
10872 */
10873 GROW;
10874 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10875
10876 ctxt->inSubset = 1;
10877 xmlParseDocTypeDecl(ctxt);
10878 if (RAW == '[') {
10879 ctxt->instate = XML_PARSER_DTD;
10880 xmlParseInternalSubset(ctxt);
10881 if (ctxt->instate == XML_PARSER_EOF)
10882 return(-1);
10883 }
10884
10885 /*
10886 * Create and update the external subset.
10887 */
10888 ctxt->inSubset = 2;
10889 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10890 (!ctxt->disableSAX))
10891 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10892 ctxt->extSubSystem, ctxt->extSubURI);
10893 if (ctxt->instate == XML_PARSER_EOF)
10894 return(-1);
10895 ctxt->inSubset = 0;
10896
10897 xmlCleanSpecialAttr(ctxt);
10898
10899 ctxt->instate = XML_PARSER_PROLOG;
10900 xmlParseMisc(ctxt);
10901 }
10902
10903 /*
10904 * Time to start parsing the tree itself
10905 */
10906 GROW;
10907 if (RAW != '<') {
10908 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10909 "Start tag expected, '<' not found\n");
10910 } else {
10911 ctxt->instate = XML_PARSER_CONTENT;
10912 xmlParseElement(ctxt);
10913 ctxt->instate = XML_PARSER_EPILOG;
10914
10915
10916 /*
10917 * The Misc part at the end
10918 */
10919 xmlParseMisc(ctxt);
10920
10921 if (RAW != 0) {
10922 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10923 }
10924 ctxt->instate = XML_PARSER_EOF;
10925 }
10926
10927 /*
10928 * SAX: end of the document processing.
10929 */
10930 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10931 ctxt->sax->endDocument(ctxt->userData);
10932
10933 /*
10934 * Remove locally kept entity definitions if the tree was not built
10935 */
10936 if ((ctxt->myDoc != NULL) &&
10937 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10938 xmlFreeDoc(ctxt->myDoc);
10939 ctxt->myDoc = NULL;
10940 }
10941
10942 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10943 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10944 if (ctxt->valid)
10945 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10946 if (ctxt->nsWellFormed)
10947 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10948 if (ctxt->options & XML_PARSE_OLD10)
10949 ctxt->myDoc->properties |= XML_DOC_OLD10;
10950 }
10951 if (! ctxt->wellFormed) {
10952 ctxt->valid = 0;
10953 return(-1);
10954 }
10955 return(0);
10956 }
10957
10958 /**
10959 * xmlParseExtParsedEnt:
10960 * @ctxt: an XML parser context
10961 *
10962 * parse a general parsed entity
10963 * An external general parsed entity is well-formed if it matches the
10964 * production labeled extParsedEnt.
10965 *
10966 * [78] extParsedEnt ::= TextDecl? content
10967 *
10968 * Returns 0, -1 in case of error. the parser context is augmented
10969 * as a result of the parsing.
10970 */
10971
10972 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10973 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10974 xmlChar start[4];
10975 xmlCharEncoding enc;
10976
10977 if ((ctxt == NULL) || (ctxt->input == NULL))
10978 return(-1);
10979
10980 xmlDefaultSAXHandlerInit();
10981
10982 xmlDetectSAX2(ctxt);
10983
10984 GROW;
10985
10986 /*
10987 * SAX: beginning of the document processing.
10988 */
10989 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10990 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10991
10992 /*
10993 * Get the 4 first bytes and decode the charset
10994 * if enc != XML_CHAR_ENCODING_NONE
10995 * plug some encoding conversion routines.
10996 */
10997 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10998 start[0] = RAW;
10999 start[1] = NXT(1);
11000 start[2] = NXT(2);
11001 start[3] = NXT(3);
11002 enc = xmlDetectCharEncoding(start, 4);
11003 if (enc != XML_CHAR_ENCODING_NONE) {
11004 xmlSwitchEncoding(ctxt, enc);
11005 }
11006 }
11007
11008
11009 if (CUR == 0) {
11010 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11011 }
11012
11013 /*
11014 * Check for the XMLDecl in the Prolog.
11015 */
11016 GROW;
11017 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11018
11019 /*
11020 * Note that we will switch encoding on the fly.
11021 */
11022 xmlParseXMLDecl(ctxt);
11023 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11024 /*
11025 * The XML REC instructs us to stop parsing right here
11026 */
11027 return(-1);
11028 }
11029 SKIP_BLANKS;
11030 } else {
11031 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11032 }
11033 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11034 ctxt->sax->startDocument(ctxt->userData);
11035 if (ctxt->instate == XML_PARSER_EOF)
11036 return(-1);
11037
11038 /*
11039 * Doing validity checking on chunk doesn't make sense
11040 */
11041 ctxt->instate = XML_PARSER_CONTENT;
11042 ctxt->validate = 0;
11043 ctxt->loadsubset = 0;
11044 ctxt->depth = 0;
11045
11046 xmlParseContent(ctxt);
11047 if (ctxt->instate == XML_PARSER_EOF)
11048 return(-1);
11049
11050 if ((RAW == '<') && (NXT(1) == '/')) {
11051 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11052 } else if (RAW != 0) {
11053 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11054 }
11055
11056 /*
11057 * SAX: end of the document processing.
11058 */
11059 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11060 ctxt->sax->endDocument(ctxt->userData);
11061
11062 if (! ctxt->wellFormed) return(-1);
11063 return(0);
11064 }
11065
11066 #ifdef LIBXML_PUSH_ENABLED
11067 /************************************************************************
11068 * *
11069 * Progressive parsing interfaces *
11070 * *
11071 ************************************************************************/
11072
11073 /**
11074 * xmlParseLookupSequence:
11075 * @ctxt: an XML parser context
11076 * @first: the first char to lookup
11077 * @next: the next char to lookup or zero
11078 * @third: the next char to lookup or zero
11079 *
11080 * Try to find if a sequence (first, next, third) or just (first next) or
11081 * (first) is available in the input stream.
11082 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11083 * to avoid rescanning sequences of bytes, it DOES change the state of the
11084 * parser, do not use liberally.
11085 *
11086 * Returns the index to the current parsing point if the full sequence
11087 * is available, -1 otherwise.
11088 */
11089 static int
xmlParseLookupSequence(xmlParserCtxtPtr ctxt,xmlChar first,xmlChar next,xmlChar third)11090 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11091 xmlChar next, xmlChar third) {
11092 int base, len;
11093 xmlParserInputPtr in;
11094 const xmlChar *buf;
11095
11096 in = ctxt->input;
11097 if (in == NULL) return(-1);
11098 base = in->cur - in->base;
11099 if (base < 0) return(-1);
11100 if (ctxt->checkIndex > base)
11101 base = ctxt->checkIndex;
11102 if (in->buf == NULL) {
11103 buf = in->base;
11104 len = in->length;
11105 } else {
11106 buf = xmlBufContent(in->buf->buffer);
11107 len = xmlBufUse(in->buf->buffer);
11108 }
11109 /* take into account the sequence length */
11110 if (third) len -= 2;
11111 else if (next) len --;
11112 for (;base < len;base++) {
11113 if (buf[base] == first) {
11114 if (third != 0) {
11115 if ((buf[base + 1] != next) ||
11116 (buf[base + 2] != third)) continue;
11117 } else if (next != 0) {
11118 if (buf[base + 1] != next) continue;
11119 }
11120 ctxt->checkIndex = 0;
11121 #ifdef DEBUG_PUSH
11122 if (next == 0)
11123 xmlGenericError(xmlGenericErrorContext,
11124 "PP: lookup '%c' found at %d\n",
11125 first, base);
11126 else if (third == 0)
11127 xmlGenericError(xmlGenericErrorContext,
11128 "PP: lookup '%c%c' found at %d\n",
11129 first, next, base);
11130 else
11131 xmlGenericError(xmlGenericErrorContext,
11132 "PP: lookup '%c%c%c' found at %d\n",
11133 first, next, third, base);
11134 #endif
11135 return(base - (in->cur - in->base));
11136 }
11137 }
11138 ctxt->checkIndex = base;
11139 #ifdef DEBUG_PUSH
11140 if (next == 0)
11141 xmlGenericError(xmlGenericErrorContext,
11142 "PP: lookup '%c' failed\n", first);
11143 else if (third == 0)
11144 xmlGenericError(xmlGenericErrorContext,
11145 "PP: lookup '%c%c' failed\n", first, next);
11146 else
11147 xmlGenericError(xmlGenericErrorContext,
11148 "PP: lookup '%c%c%c' failed\n", first, next, third);
11149 #endif
11150 return(-1);
11151 }
11152
11153 /**
11154 * xmlParseGetLasts:
11155 * @ctxt: an XML parser context
11156 * @lastlt: pointer to store the last '<' from the input
11157 * @lastgt: pointer to store the last '>' from the input
11158 *
11159 * Lookup the last < and > in the current chunk
11160 */
11161 static void
xmlParseGetLasts(xmlParserCtxtPtr ctxt,const xmlChar ** lastlt,const xmlChar ** lastgt)11162 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11163 const xmlChar **lastgt) {
11164 const xmlChar *tmp;
11165
11166 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11167 xmlGenericError(xmlGenericErrorContext,
11168 "Internal error: xmlParseGetLasts\n");
11169 return;
11170 }
11171 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11172 tmp = ctxt->input->end;
11173 tmp--;
11174 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11175 if (tmp < ctxt->input->base) {
11176 *lastlt = NULL;
11177 *lastgt = NULL;
11178 } else {
11179 *lastlt = tmp;
11180 tmp++;
11181 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11182 if (*tmp == '\'') {
11183 tmp++;
11184 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11185 if (tmp < ctxt->input->end) tmp++;
11186 } else if (*tmp == '"') {
11187 tmp++;
11188 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11189 if (tmp < ctxt->input->end) tmp++;
11190 } else
11191 tmp++;
11192 }
11193 if (tmp < ctxt->input->end)
11194 *lastgt = tmp;
11195 else {
11196 tmp = *lastlt;
11197 tmp--;
11198 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11199 if (tmp >= ctxt->input->base)
11200 *lastgt = tmp;
11201 else
11202 *lastgt = NULL;
11203 }
11204 }
11205 } else {
11206 *lastlt = NULL;
11207 *lastgt = NULL;
11208 }
11209 }
11210 /**
11211 * xmlCheckCdataPush:
11212 * @cur: pointer to the bock of characters
11213 * @len: length of the block in bytes
11214 *
11215 * Check that the block of characters is okay as SCdata content [20]
11216 *
11217 * Returns the number of bytes to pass if okay, a negative index where an
11218 * UTF-8 error occured otherwise
11219 */
11220 static int
xmlCheckCdataPush(const xmlChar * utf,int len)11221 xmlCheckCdataPush(const xmlChar *utf, int len) {
11222 int ix;
11223 unsigned char c;
11224 int codepoint;
11225
11226 if ((utf == NULL) || (len <= 0))
11227 return(0);
11228
11229 for (ix = 0; ix < len;) { /* string is 0-terminated */
11230 c = utf[ix];
11231 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11232 if (c >= 0x20)
11233 ix++;
11234 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11235 ix++;
11236 else
11237 return(-ix);
11238 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11239 if (ix + 2 > len) return(-ix);
11240 if ((utf[ix+1] & 0xc0 ) != 0x80)
11241 return(-ix);
11242 codepoint = (utf[ix] & 0x1f) << 6;
11243 codepoint |= utf[ix+1] & 0x3f;
11244 if (!xmlIsCharQ(codepoint))
11245 return(-ix);
11246 ix += 2;
11247 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11248 if (ix + 3 > len) return(-ix);
11249 if (((utf[ix+1] & 0xc0) != 0x80) ||
11250 ((utf[ix+2] & 0xc0) != 0x80))
11251 return(-ix);
11252 codepoint = (utf[ix] & 0xf) << 12;
11253 codepoint |= (utf[ix+1] & 0x3f) << 6;
11254 codepoint |= utf[ix+2] & 0x3f;
11255 if (!xmlIsCharQ(codepoint))
11256 return(-ix);
11257 ix += 3;
11258 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11259 if (ix + 4 > len) return(-ix);
11260 if (((utf[ix+1] & 0xc0) != 0x80) ||
11261 ((utf[ix+2] & 0xc0) != 0x80) ||
11262 ((utf[ix+3] & 0xc0) != 0x80))
11263 return(-ix);
11264 codepoint = (utf[ix] & 0x7) << 18;
11265 codepoint |= (utf[ix+1] & 0x3f) << 12;
11266 codepoint |= (utf[ix+2] & 0x3f) << 6;
11267 codepoint |= utf[ix+3] & 0x3f;
11268 if (!xmlIsCharQ(codepoint))
11269 return(-ix);
11270 ix += 4;
11271 } else /* unknown encoding */
11272 return(-ix);
11273 }
11274 return(ix);
11275 }
11276
11277 /**
11278 * xmlParseTryOrFinish:
11279 * @ctxt: an XML parser context
11280 * @terminate: last chunk indicator
11281 *
11282 * Try to progress on parsing
11283 *
11284 * Returns zero if no parsing was possible
11285 */
11286 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)11287 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11288 int ret = 0;
11289 int avail, tlen;
11290 xmlChar cur, next;
11291 const xmlChar *lastlt, *lastgt;
11292
11293 if (ctxt->input == NULL)
11294 return(0);
11295
11296 #ifdef DEBUG_PUSH
11297 switch (ctxt->instate) {
11298 case XML_PARSER_EOF:
11299 xmlGenericError(xmlGenericErrorContext,
11300 "PP: try EOF\n"); break;
11301 case XML_PARSER_START:
11302 xmlGenericError(xmlGenericErrorContext,
11303 "PP: try START\n"); break;
11304 case XML_PARSER_MISC:
11305 xmlGenericError(xmlGenericErrorContext,
11306 "PP: try MISC\n");break;
11307 case XML_PARSER_COMMENT:
11308 xmlGenericError(xmlGenericErrorContext,
11309 "PP: try COMMENT\n");break;
11310 case XML_PARSER_PROLOG:
11311 xmlGenericError(xmlGenericErrorContext,
11312 "PP: try PROLOG\n");break;
11313 case XML_PARSER_START_TAG:
11314 xmlGenericError(xmlGenericErrorContext,
11315 "PP: try START_TAG\n");break;
11316 case XML_PARSER_CONTENT:
11317 xmlGenericError(xmlGenericErrorContext,
11318 "PP: try CONTENT\n");break;
11319 case XML_PARSER_CDATA_SECTION:
11320 xmlGenericError(xmlGenericErrorContext,
11321 "PP: try CDATA_SECTION\n");break;
11322 case XML_PARSER_END_TAG:
11323 xmlGenericError(xmlGenericErrorContext,
11324 "PP: try END_TAG\n");break;
11325 case XML_PARSER_ENTITY_DECL:
11326 xmlGenericError(xmlGenericErrorContext,
11327 "PP: try ENTITY_DECL\n");break;
11328 case XML_PARSER_ENTITY_VALUE:
11329 xmlGenericError(xmlGenericErrorContext,
11330 "PP: try ENTITY_VALUE\n");break;
11331 case XML_PARSER_ATTRIBUTE_VALUE:
11332 xmlGenericError(xmlGenericErrorContext,
11333 "PP: try ATTRIBUTE_VALUE\n");break;
11334 case XML_PARSER_DTD:
11335 xmlGenericError(xmlGenericErrorContext,
11336 "PP: try DTD\n");break;
11337 case XML_PARSER_EPILOG:
11338 xmlGenericError(xmlGenericErrorContext,
11339 "PP: try EPILOG\n");break;
11340 case XML_PARSER_PI:
11341 xmlGenericError(xmlGenericErrorContext,
11342 "PP: try PI\n");break;
11343 case XML_PARSER_IGNORE:
11344 xmlGenericError(xmlGenericErrorContext,
11345 "PP: try IGNORE\n");break;
11346 }
11347 #endif
11348
11349 if ((ctxt->input != NULL) &&
11350 (ctxt->input->cur - ctxt->input->base > 4096)) {
11351 xmlSHRINK(ctxt);
11352 ctxt->checkIndex = 0;
11353 }
11354 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11355
11356 while (ctxt->instate != XML_PARSER_EOF) {
11357 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11358 return(0);
11359
11360
11361 /*
11362 * Pop-up of finished entities.
11363 */
11364 while ((RAW == 0) && (ctxt->inputNr > 1))
11365 xmlPopInput(ctxt);
11366
11367 if (ctxt->input == NULL) break;
11368 if (ctxt->input->buf == NULL)
11369 avail = ctxt->input->length -
11370 (ctxt->input->cur - ctxt->input->base);
11371 else {
11372 /*
11373 * If we are operating on converted input, try to flush
11374 * remainng chars to avoid them stalling in the non-converted
11375 * buffer. But do not do this in document start where
11376 * encoding="..." may not have been read and we work on a
11377 * guessed encoding.
11378 */
11379 if ((ctxt->instate != XML_PARSER_START) &&
11380 (ctxt->input->buf->raw != NULL) &&
11381 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11382 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11383 ctxt->input);
11384 size_t current = ctxt->input->cur - ctxt->input->base;
11385
11386 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11387 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11388 base, current);
11389 }
11390 avail = xmlBufUse(ctxt->input->buf->buffer) -
11391 (ctxt->input->cur - ctxt->input->base);
11392 }
11393 if (avail < 1)
11394 goto done;
11395 switch (ctxt->instate) {
11396 case XML_PARSER_EOF:
11397 /*
11398 * Document parsing is done !
11399 */
11400 goto done;
11401 case XML_PARSER_START:
11402 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11403 xmlChar start[4];
11404 xmlCharEncoding enc;
11405
11406 /*
11407 * Very first chars read from the document flow.
11408 */
11409 if (avail < 4)
11410 goto done;
11411
11412 /*
11413 * Get the 4 first bytes and decode the charset
11414 * if enc != XML_CHAR_ENCODING_NONE
11415 * plug some encoding conversion routines,
11416 * else xmlSwitchEncoding will set to (default)
11417 * UTF8.
11418 */
11419 start[0] = RAW;
11420 start[1] = NXT(1);
11421 start[2] = NXT(2);
11422 start[3] = NXT(3);
11423 enc = xmlDetectCharEncoding(start, 4);
11424 xmlSwitchEncoding(ctxt, enc);
11425 break;
11426 }
11427
11428 if (avail < 2)
11429 goto done;
11430 cur = ctxt->input->cur[0];
11431 next = ctxt->input->cur[1];
11432 if (cur == 0) {
11433 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11434 ctxt->sax->setDocumentLocator(ctxt->userData,
11435 &xmlDefaultSAXLocator);
11436 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11437 xmlHaltParser(ctxt);
11438 #ifdef DEBUG_PUSH
11439 xmlGenericError(xmlGenericErrorContext,
11440 "PP: entering EOF\n");
11441 #endif
11442 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11443 ctxt->sax->endDocument(ctxt->userData);
11444 goto done;
11445 }
11446 if ((cur == '<') && (next == '?')) {
11447 /* PI or XML decl */
11448 if (avail < 5) return(ret);
11449 if ((!terminate) &&
11450 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11451 return(ret);
11452 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11453 ctxt->sax->setDocumentLocator(ctxt->userData,
11454 &xmlDefaultSAXLocator);
11455 if ((ctxt->input->cur[2] == 'x') &&
11456 (ctxt->input->cur[3] == 'm') &&
11457 (ctxt->input->cur[4] == 'l') &&
11458 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11459 ret += 5;
11460 #ifdef DEBUG_PUSH
11461 xmlGenericError(xmlGenericErrorContext,
11462 "PP: Parsing XML Decl\n");
11463 #endif
11464 xmlParseXMLDecl(ctxt);
11465 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11466 /*
11467 * The XML REC instructs us to stop parsing right
11468 * here
11469 */
11470 xmlHaltParser(ctxt);
11471 return(0);
11472 }
11473 ctxt->standalone = ctxt->input->standalone;
11474 if ((ctxt->encoding == NULL) &&
11475 (ctxt->input->encoding != NULL))
11476 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11477 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11478 (!ctxt->disableSAX))
11479 ctxt->sax->startDocument(ctxt->userData);
11480 ctxt->instate = XML_PARSER_MISC;
11481 #ifdef DEBUG_PUSH
11482 xmlGenericError(xmlGenericErrorContext,
11483 "PP: entering MISC\n");
11484 #endif
11485 } else {
11486 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11487 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11488 (!ctxt->disableSAX))
11489 ctxt->sax->startDocument(ctxt->userData);
11490 ctxt->instate = XML_PARSER_MISC;
11491 #ifdef DEBUG_PUSH
11492 xmlGenericError(xmlGenericErrorContext,
11493 "PP: entering MISC\n");
11494 #endif
11495 }
11496 } else {
11497 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11498 ctxt->sax->setDocumentLocator(ctxt->userData,
11499 &xmlDefaultSAXLocator);
11500 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11501 if (ctxt->version == NULL) {
11502 xmlErrMemory(ctxt, NULL);
11503 break;
11504 }
11505 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11506 (!ctxt->disableSAX))
11507 ctxt->sax->startDocument(ctxt->userData);
11508 ctxt->instate = XML_PARSER_MISC;
11509 #ifdef DEBUG_PUSH
11510 xmlGenericError(xmlGenericErrorContext,
11511 "PP: entering MISC\n");
11512 #endif
11513 }
11514 break;
11515 case XML_PARSER_START_TAG: {
11516 const xmlChar *name;
11517 const xmlChar *prefix = NULL;
11518 const xmlChar *URI = NULL;
11519 int nsNr = ctxt->nsNr;
11520
11521 if ((avail < 2) && (ctxt->inputNr == 1))
11522 goto done;
11523 cur = ctxt->input->cur[0];
11524 if (cur != '<') {
11525 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11526 xmlHaltParser(ctxt);
11527 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11528 ctxt->sax->endDocument(ctxt->userData);
11529 goto done;
11530 }
11531 if (!terminate) {
11532 if (ctxt->progressive) {
11533 /* > can be found unescaped in attribute values */
11534 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11535 goto done;
11536 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11537 goto done;
11538 }
11539 }
11540 if (ctxt->spaceNr == 0)
11541 spacePush(ctxt, -1);
11542 else if (*ctxt->space == -2)
11543 spacePush(ctxt, -1);
11544 else
11545 spacePush(ctxt, *ctxt->space);
11546 #ifdef LIBXML_SAX1_ENABLED
11547 if (ctxt->sax2)
11548 #endif /* LIBXML_SAX1_ENABLED */
11549 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11550 #ifdef LIBXML_SAX1_ENABLED
11551 else
11552 name = xmlParseStartTag(ctxt);
11553 #endif /* LIBXML_SAX1_ENABLED */
11554 if (ctxt->instate == XML_PARSER_EOF)
11555 goto done;
11556 if (name == NULL) {
11557 spacePop(ctxt);
11558 xmlHaltParser(ctxt);
11559 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11560 ctxt->sax->endDocument(ctxt->userData);
11561 goto done;
11562 }
11563 #ifdef LIBXML_VALID_ENABLED
11564 /*
11565 * [ VC: Root Element Type ]
11566 * The Name in the document type declaration must match
11567 * the element type of the root element.
11568 */
11569 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11570 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11571 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11572 #endif /* LIBXML_VALID_ENABLED */
11573
11574 /*
11575 * Check for an Empty Element.
11576 */
11577 if ((RAW == '/') && (NXT(1) == '>')) {
11578 SKIP(2);
11579
11580 if (ctxt->sax2) {
11581 if ((ctxt->sax != NULL) &&
11582 (ctxt->sax->endElementNs != NULL) &&
11583 (!ctxt->disableSAX))
11584 ctxt->sax->endElementNs(ctxt->userData, name,
11585 prefix, URI);
11586 if (ctxt->nsNr - nsNr > 0)
11587 nsPop(ctxt, ctxt->nsNr - nsNr);
11588 #ifdef LIBXML_SAX1_ENABLED
11589 } else {
11590 if ((ctxt->sax != NULL) &&
11591 (ctxt->sax->endElement != NULL) &&
11592 (!ctxt->disableSAX))
11593 ctxt->sax->endElement(ctxt->userData, name);
11594 #endif /* LIBXML_SAX1_ENABLED */
11595 }
11596 if (ctxt->instate == XML_PARSER_EOF)
11597 goto done;
11598 spacePop(ctxt);
11599 if (ctxt->nameNr == 0) {
11600 ctxt->instate = XML_PARSER_EPILOG;
11601 } else {
11602 ctxt->instate = XML_PARSER_CONTENT;
11603 }
11604 ctxt->progressive = 1;
11605 break;
11606 }
11607 if (RAW == '>') {
11608 NEXT;
11609 } else {
11610 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11611 "Couldn't find end of Start Tag %s\n",
11612 name);
11613 nodePop(ctxt);
11614 spacePop(ctxt);
11615 }
11616 if (ctxt->sax2)
11617 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11618 #ifdef LIBXML_SAX1_ENABLED
11619 else
11620 namePush(ctxt, name);
11621 #endif /* LIBXML_SAX1_ENABLED */
11622
11623 ctxt->instate = XML_PARSER_CONTENT;
11624 ctxt->progressive = 1;
11625 break;
11626 }
11627 case XML_PARSER_CONTENT: {
11628 const xmlChar *test;
11629 unsigned int cons;
11630 if ((avail < 2) && (ctxt->inputNr == 1))
11631 goto done;
11632 cur = ctxt->input->cur[0];
11633 next = ctxt->input->cur[1];
11634
11635 test = CUR_PTR;
11636 cons = ctxt->input->consumed;
11637 if ((cur == '<') && (next == '/')) {
11638 ctxt->instate = XML_PARSER_END_TAG;
11639 break;
11640 } else if ((cur == '<') && (next == '?')) {
11641 if ((!terminate) &&
11642 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11643 ctxt->progressive = XML_PARSER_PI;
11644 goto done;
11645 }
11646 xmlParsePI(ctxt);
11647 ctxt->instate = XML_PARSER_CONTENT;
11648 ctxt->progressive = 1;
11649 } else if ((cur == '<') && (next != '!')) {
11650 ctxt->instate = XML_PARSER_START_TAG;
11651 break;
11652 } else if ((cur == '<') && (next == '!') &&
11653 (ctxt->input->cur[2] == '-') &&
11654 (ctxt->input->cur[3] == '-')) {
11655 int term;
11656
11657 if (avail < 4)
11658 goto done;
11659 ctxt->input->cur += 4;
11660 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11661 ctxt->input->cur -= 4;
11662 if ((!terminate) && (term < 0)) {
11663 ctxt->progressive = XML_PARSER_COMMENT;
11664 goto done;
11665 }
11666 xmlParseComment(ctxt);
11667 ctxt->instate = XML_PARSER_CONTENT;
11668 ctxt->progressive = 1;
11669 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11670 (ctxt->input->cur[2] == '[') &&
11671 (ctxt->input->cur[3] == 'C') &&
11672 (ctxt->input->cur[4] == 'D') &&
11673 (ctxt->input->cur[5] == 'A') &&
11674 (ctxt->input->cur[6] == 'T') &&
11675 (ctxt->input->cur[7] == 'A') &&
11676 (ctxt->input->cur[8] == '[')) {
11677 SKIP(9);
11678 ctxt->instate = XML_PARSER_CDATA_SECTION;
11679 break;
11680 } else if ((cur == '<') && (next == '!') &&
11681 (avail < 9)) {
11682 goto done;
11683 } else if (cur == '&') {
11684 if ((!terminate) &&
11685 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11686 goto done;
11687 xmlParseReference(ctxt);
11688 } else {
11689 /* TODO Avoid the extra copy, handle directly !!! */
11690 /*
11691 * Goal of the following test is:
11692 * - minimize calls to the SAX 'character' callback
11693 * when they are mergeable
11694 * - handle an problem for isBlank when we only parse
11695 * a sequence of blank chars and the next one is
11696 * not available to check against '<' presence.
11697 * - tries to homogenize the differences in SAX
11698 * callbacks between the push and pull versions
11699 * of the parser.
11700 */
11701 if ((ctxt->inputNr == 1) &&
11702 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11703 if (!terminate) {
11704 if (ctxt->progressive) {
11705 if ((lastlt == NULL) ||
11706 (ctxt->input->cur > lastlt))
11707 goto done;
11708 } else if (xmlParseLookupSequence(ctxt,
11709 '<', 0, 0) < 0) {
11710 goto done;
11711 }
11712 }
11713 }
11714 ctxt->checkIndex = 0;
11715 xmlParseCharData(ctxt, 0);
11716 }
11717 /*
11718 * Pop-up of finished entities.
11719 */
11720 while ((RAW == 0) && (ctxt->inputNr > 1))
11721 xmlPopInput(ctxt);
11722 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11723 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11724 "detected an error in element content\n");
11725 xmlHaltParser(ctxt);
11726 break;
11727 }
11728 break;
11729 }
11730 case XML_PARSER_END_TAG:
11731 if (avail < 2)
11732 goto done;
11733 if (!terminate) {
11734 if (ctxt->progressive) {
11735 /* > can be found unescaped in attribute values */
11736 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11737 goto done;
11738 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11739 goto done;
11740 }
11741 }
11742 if (ctxt->sax2) {
11743 xmlParseEndTag2(ctxt,
11744 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11745 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11746 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11747 nameNsPop(ctxt);
11748 }
11749 #ifdef LIBXML_SAX1_ENABLED
11750 else
11751 xmlParseEndTag1(ctxt, 0);
11752 #endif /* LIBXML_SAX1_ENABLED */
11753 if (ctxt->instate == XML_PARSER_EOF) {
11754 /* Nothing */
11755 } else if (ctxt->nameNr == 0) {
11756 ctxt->instate = XML_PARSER_EPILOG;
11757 } else {
11758 ctxt->instate = XML_PARSER_CONTENT;
11759 }
11760 break;
11761 case XML_PARSER_CDATA_SECTION: {
11762 /*
11763 * The Push mode need to have the SAX callback for
11764 * cdataBlock merge back contiguous callbacks.
11765 */
11766 int base;
11767
11768 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11769 if (base < 0) {
11770 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11771 int tmp;
11772
11773 tmp = xmlCheckCdataPush(ctxt->input->cur,
11774 XML_PARSER_BIG_BUFFER_SIZE);
11775 if (tmp < 0) {
11776 tmp = -tmp;
11777 ctxt->input->cur += tmp;
11778 goto encoding_error;
11779 }
11780 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11781 if (ctxt->sax->cdataBlock != NULL)
11782 ctxt->sax->cdataBlock(ctxt->userData,
11783 ctxt->input->cur, tmp);
11784 else if (ctxt->sax->characters != NULL)
11785 ctxt->sax->characters(ctxt->userData,
11786 ctxt->input->cur, tmp);
11787 }
11788 if (ctxt->instate == XML_PARSER_EOF)
11789 goto done;
11790 SKIPL(tmp);
11791 ctxt->checkIndex = 0;
11792 }
11793 goto done;
11794 } else {
11795 int tmp;
11796
11797 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11798 if ((tmp < 0) || (tmp != base)) {
11799 tmp = -tmp;
11800 ctxt->input->cur += tmp;
11801 goto encoding_error;
11802 }
11803 if ((ctxt->sax != NULL) && (base == 0) &&
11804 (ctxt->sax->cdataBlock != NULL) &&
11805 (!ctxt->disableSAX)) {
11806 /*
11807 * Special case to provide identical behaviour
11808 * between pull and push parsers on enpty CDATA
11809 * sections
11810 */
11811 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11812 (!strncmp((const char *)&ctxt->input->cur[-9],
11813 "<![CDATA[", 9)))
11814 ctxt->sax->cdataBlock(ctxt->userData,
11815 BAD_CAST "", 0);
11816 } else if ((ctxt->sax != NULL) && (base > 0) &&
11817 (!ctxt->disableSAX)) {
11818 if (ctxt->sax->cdataBlock != NULL)
11819 ctxt->sax->cdataBlock(ctxt->userData,
11820 ctxt->input->cur, base);
11821 else if (ctxt->sax->characters != NULL)
11822 ctxt->sax->characters(ctxt->userData,
11823 ctxt->input->cur, base);
11824 }
11825 if (ctxt->instate == XML_PARSER_EOF)
11826 goto done;
11827 SKIPL(base + 3);
11828 ctxt->checkIndex = 0;
11829 ctxt->instate = XML_PARSER_CONTENT;
11830 #ifdef DEBUG_PUSH
11831 xmlGenericError(xmlGenericErrorContext,
11832 "PP: entering CONTENT\n");
11833 #endif
11834 }
11835 break;
11836 }
11837 case XML_PARSER_MISC:
11838 SKIP_BLANKS;
11839 if (ctxt->input->buf == NULL)
11840 avail = ctxt->input->length -
11841 (ctxt->input->cur - ctxt->input->base);
11842 else
11843 avail = xmlBufUse(ctxt->input->buf->buffer) -
11844 (ctxt->input->cur - ctxt->input->base);
11845 if (avail < 2)
11846 goto done;
11847 cur = ctxt->input->cur[0];
11848 next = ctxt->input->cur[1];
11849 if ((cur == '<') && (next == '?')) {
11850 if ((!terminate) &&
11851 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11852 ctxt->progressive = XML_PARSER_PI;
11853 goto done;
11854 }
11855 #ifdef DEBUG_PUSH
11856 xmlGenericError(xmlGenericErrorContext,
11857 "PP: Parsing PI\n");
11858 #endif
11859 xmlParsePI(ctxt);
11860 if (ctxt->instate == XML_PARSER_EOF)
11861 goto done;
11862 ctxt->instate = XML_PARSER_MISC;
11863 ctxt->progressive = 1;
11864 ctxt->checkIndex = 0;
11865 } else if ((cur == '<') && (next == '!') &&
11866 (ctxt->input->cur[2] == '-') &&
11867 (ctxt->input->cur[3] == '-')) {
11868 if ((!terminate) &&
11869 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11870 ctxt->progressive = XML_PARSER_COMMENT;
11871 goto done;
11872 }
11873 #ifdef DEBUG_PUSH
11874 xmlGenericError(xmlGenericErrorContext,
11875 "PP: Parsing Comment\n");
11876 #endif
11877 xmlParseComment(ctxt);
11878 if (ctxt->instate == XML_PARSER_EOF)
11879 goto done;
11880 ctxt->instate = XML_PARSER_MISC;
11881 ctxt->progressive = 1;
11882 ctxt->checkIndex = 0;
11883 } else if ((cur == '<') && (next == '!') &&
11884 (ctxt->input->cur[2] == 'D') &&
11885 (ctxt->input->cur[3] == 'O') &&
11886 (ctxt->input->cur[4] == 'C') &&
11887 (ctxt->input->cur[5] == 'T') &&
11888 (ctxt->input->cur[6] == 'Y') &&
11889 (ctxt->input->cur[7] == 'P') &&
11890 (ctxt->input->cur[8] == 'E')) {
11891 if ((!terminate) &&
11892 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11893 ctxt->progressive = XML_PARSER_DTD;
11894 goto done;
11895 }
11896 #ifdef DEBUG_PUSH
11897 xmlGenericError(xmlGenericErrorContext,
11898 "PP: Parsing internal subset\n");
11899 #endif
11900 ctxt->inSubset = 1;
11901 ctxt->progressive = 0;
11902 ctxt->checkIndex = 0;
11903 xmlParseDocTypeDecl(ctxt);
11904 if (ctxt->instate == XML_PARSER_EOF)
11905 goto done;
11906 if (RAW == '[') {
11907 ctxt->instate = XML_PARSER_DTD;
11908 #ifdef DEBUG_PUSH
11909 xmlGenericError(xmlGenericErrorContext,
11910 "PP: entering DTD\n");
11911 #endif
11912 } else {
11913 /*
11914 * Create and update the external subset.
11915 */
11916 ctxt->inSubset = 2;
11917 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11918 (ctxt->sax->externalSubset != NULL))
11919 ctxt->sax->externalSubset(ctxt->userData,
11920 ctxt->intSubName, ctxt->extSubSystem,
11921 ctxt->extSubURI);
11922 ctxt->inSubset = 0;
11923 xmlCleanSpecialAttr(ctxt);
11924 ctxt->instate = XML_PARSER_PROLOG;
11925 #ifdef DEBUG_PUSH
11926 xmlGenericError(xmlGenericErrorContext,
11927 "PP: entering PROLOG\n");
11928 #endif
11929 }
11930 } else if ((cur == '<') && (next == '!') &&
11931 (avail < 9)) {
11932 goto done;
11933 } else {
11934 ctxt->instate = XML_PARSER_START_TAG;
11935 ctxt->progressive = XML_PARSER_START_TAG;
11936 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11937 #ifdef DEBUG_PUSH
11938 xmlGenericError(xmlGenericErrorContext,
11939 "PP: entering START_TAG\n");
11940 #endif
11941 }
11942 break;
11943 case XML_PARSER_PROLOG:
11944 SKIP_BLANKS;
11945 if (ctxt->input->buf == NULL)
11946 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11947 else
11948 avail = xmlBufUse(ctxt->input->buf->buffer) -
11949 (ctxt->input->cur - ctxt->input->base);
11950 if (avail < 2)
11951 goto done;
11952 cur = ctxt->input->cur[0];
11953 next = ctxt->input->cur[1];
11954 if ((cur == '<') && (next == '?')) {
11955 if ((!terminate) &&
11956 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11957 ctxt->progressive = XML_PARSER_PI;
11958 goto done;
11959 }
11960 #ifdef DEBUG_PUSH
11961 xmlGenericError(xmlGenericErrorContext,
11962 "PP: Parsing PI\n");
11963 #endif
11964 xmlParsePI(ctxt);
11965 if (ctxt->instate == XML_PARSER_EOF)
11966 goto done;
11967 ctxt->instate = XML_PARSER_PROLOG;
11968 ctxt->progressive = 1;
11969 } else if ((cur == '<') && (next == '!') &&
11970 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11971 if ((!terminate) &&
11972 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11973 ctxt->progressive = XML_PARSER_COMMENT;
11974 goto done;
11975 }
11976 #ifdef DEBUG_PUSH
11977 xmlGenericError(xmlGenericErrorContext,
11978 "PP: Parsing Comment\n");
11979 #endif
11980 xmlParseComment(ctxt);
11981 if (ctxt->instate == XML_PARSER_EOF)
11982 goto done;
11983 ctxt->instate = XML_PARSER_PROLOG;
11984 ctxt->progressive = 1;
11985 } else if ((cur == '<') && (next == '!') &&
11986 (avail < 4)) {
11987 goto done;
11988 } else {
11989 ctxt->instate = XML_PARSER_START_TAG;
11990 if (ctxt->progressive == 0)
11991 ctxt->progressive = XML_PARSER_START_TAG;
11992 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11993 #ifdef DEBUG_PUSH
11994 xmlGenericError(xmlGenericErrorContext,
11995 "PP: entering START_TAG\n");
11996 #endif
11997 }
11998 break;
11999 case XML_PARSER_EPILOG:
12000 SKIP_BLANKS;
12001 if (ctxt->input->buf == NULL)
12002 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
12003 else
12004 avail = xmlBufUse(ctxt->input->buf->buffer) -
12005 (ctxt->input->cur - ctxt->input->base);
12006 if (avail < 2)
12007 goto done;
12008 cur = ctxt->input->cur[0];
12009 next = ctxt->input->cur[1];
12010 if ((cur == '<') && (next == '?')) {
12011 if ((!terminate) &&
12012 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
12013 ctxt->progressive = XML_PARSER_PI;
12014 goto done;
12015 }
12016 #ifdef DEBUG_PUSH
12017 xmlGenericError(xmlGenericErrorContext,
12018 "PP: Parsing PI\n");
12019 #endif
12020 xmlParsePI(ctxt);
12021 if (ctxt->instate == XML_PARSER_EOF)
12022 goto done;
12023 ctxt->instate = XML_PARSER_EPILOG;
12024 ctxt->progressive = 1;
12025 } else if ((cur == '<') && (next == '!') &&
12026 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
12027 if ((!terminate) &&
12028 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
12029 ctxt->progressive = XML_PARSER_COMMENT;
12030 goto done;
12031 }
12032 #ifdef DEBUG_PUSH
12033 xmlGenericError(xmlGenericErrorContext,
12034 "PP: Parsing Comment\n");
12035 #endif
12036 xmlParseComment(ctxt);
12037 if (ctxt->instate == XML_PARSER_EOF)
12038 goto done;
12039 ctxt->instate = XML_PARSER_EPILOG;
12040 ctxt->progressive = 1;
12041 } else if ((cur == '<') && (next == '!') &&
12042 (avail < 4)) {
12043 goto done;
12044 } else {
12045 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12046 xmlHaltParser(ctxt);
12047 #ifdef DEBUG_PUSH
12048 xmlGenericError(xmlGenericErrorContext,
12049 "PP: entering EOF\n");
12050 #endif
12051 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12052 ctxt->sax->endDocument(ctxt->userData);
12053 goto done;
12054 }
12055 break;
12056 case XML_PARSER_DTD: {
12057 /*
12058 * Sorry but progressive parsing of the internal subset
12059 * is not expected to be supported. We first check that
12060 * the full content of the internal subset is available and
12061 * the parsing is launched only at that point.
12062 * Internal subset ends up with "']' S? '>'" in an unescaped
12063 * section and not in a ']]>' sequence which are conditional
12064 * sections (whoever argued to keep that crap in XML deserve
12065 * a place in hell !).
12066 */
12067 int base, i;
12068 xmlChar *buf;
12069 xmlChar quote = 0;
12070 size_t use;
12071
12072 base = ctxt->input->cur - ctxt->input->base;
12073 if (base < 0) return(0);
12074 if (ctxt->checkIndex > base)
12075 base = ctxt->checkIndex;
12076 buf = xmlBufContent(ctxt->input->buf->buffer);
12077 use = xmlBufUse(ctxt->input->buf->buffer);
12078 for (;(unsigned int) base < use; base++) {
12079 if (quote != 0) {
12080 if (buf[base] == quote)
12081 quote = 0;
12082 continue;
12083 }
12084 if ((quote == 0) && (buf[base] == '<')) {
12085 int found = 0;
12086 /* special handling of comments */
12087 if (((unsigned int) base + 4 < use) &&
12088 (buf[base + 1] == '!') &&
12089 (buf[base + 2] == '-') &&
12090 (buf[base + 3] == '-')) {
12091 for (;(unsigned int) base + 3 < use; base++) {
12092 if ((buf[base] == '-') &&
12093 (buf[base + 1] == '-') &&
12094 (buf[base + 2] == '>')) {
12095 found = 1;
12096 base += 2;
12097 break;
12098 }
12099 }
12100 if (!found) {
12101 #if 0
12102 fprintf(stderr, "unfinished comment\n");
12103 #endif
12104 break; /* for */
12105 }
12106 continue;
12107 }
12108 }
12109 if (buf[base] == '"') {
12110 quote = '"';
12111 continue;
12112 }
12113 if (buf[base] == '\'') {
12114 quote = '\'';
12115 continue;
12116 }
12117 if (buf[base] == ']') {
12118 #if 0
12119 fprintf(stderr, "%c%c%c%c: ", buf[base],
12120 buf[base + 1], buf[base + 2], buf[base + 3]);
12121 #endif
12122 if ((unsigned int) base +1 >= use)
12123 break;
12124 if (buf[base + 1] == ']') {
12125 /* conditional crap, skip both ']' ! */
12126 base++;
12127 continue;
12128 }
12129 for (i = 1; (unsigned int) base + i < use; i++) {
12130 if (buf[base + i] == '>') {
12131 #if 0
12132 fprintf(stderr, "found\n");
12133 #endif
12134 goto found_end_int_subset;
12135 }
12136 if (!IS_BLANK_CH(buf[base + i])) {
12137 #if 0
12138 fprintf(stderr, "not found\n");
12139 #endif
12140 goto not_end_of_int_subset;
12141 }
12142 }
12143 #if 0
12144 fprintf(stderr, "end of stream\n");
12145 #endif
12146 break;
12147
12148 }
12149 not_end_of_int_subset:
12150 continue; /* for */
12151 }
12152 /*
12153 * We didn't found the end of the Internal subset
12154 */
12155 if (quote == 0)
12156 ctxt->checkIndex = base;
12157 else
12158 ctxt->checkIndex = 0;
12159 #ifdef DEBUG_PUSH
12160 if (next == 0)
12161 xmlGenericError(xmlGenericErrorContext,
12162 "PP: lookup of int subset end filed\n");
12163 #endif
12164 goto done;
12165
12166 found_end_int_subset:
12167 ctxt->checkIndex = 0;
12168 xmlParseInternalSubset(ctxt);
12169 if (ctxt->instate == XML_PARSER_EOF)
12170 goto done;
12171 ctxt->inSubset = 2;
12172 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12173 (ctxt->sax->externalSubset != NULL))
12174 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12175 ctxt->extSubSystem, ctxt->extSubURI);
12176 ctxt->inSubset = 0;
12177 xmlCleanSpecialAttr(ctxt);
12178 if (ctxt->instate == XML_PARSER_EOF)
12179 goto done;
12180 ctxt->instate = XML_PARSER_PROLOG;
12181 ctxt->checkIndex = 0;
12182 #ifdef DEBUG_PUSH
12183 xmlGenericError(xmlGenericErrorContext,
12184 "PP: entering PROLOG\n");
12185 #endif
12186 break;
12187 }
12188 case XML_PARSER_COMMENT:
12189 xmlGenericError(xmlGenericErrorContext,
12190 "PP: internal error, state == COMMENT\n");
12191 ctxt->instate = XML_PARSER_CONTENT;
12192 #ifdef DEBUG_PUSH
12193 xmlGenericError(xmlGenericErrorContext,
12194 "PP: entering CONTENT\n");
12195 #endif
12196 break;
12197 case XML_PARSER_IGNORE:
12198 xmlGenericError(xmlGenericErrorContext,
12199 "PP: internal error, state == IGNORE");
12200 ctxt->instate = XML_PARSER_DTD;
12201 #ifdef DEBUG_PUSH
12202 xmlGenericError(xmlGenericErrorContext,
12203 "PP: entering DTD\n");
12204 #endif
12205 break;
12206 case XML_PARSER_PI:
12207 xmlGenericError(xmlGenericErrorContext,
12208 "PP: internal error, state == PI\n");
12209 ctxt->instate = XML_PARSER_CONTENT;
12210 #ifdef DEBUG_PUSH
12211 xmlGenericError(xmlGenericErrorContext,
12212 "PP: entering CONTENT\n");
12213 #endif
12214 break;
12215 case XML_PARSER_ENTITY_DECL:
12216 xmlGenericError(xmlGenericErrorContext,
12217 "PP: internal error, state == ENTITY_DECL\n");
12218 ctxt->instate = XML_PARSER_DTD;
12219 #ifdef DEBUG_PUSH
12220 xmlGenericError(xmlGenericErrorContext,
12221 "PP: entering DTD\n");
12222 #endif
12223 break;
12224 case XML_PARSER_ENTITY_VALUE:
12225 xmlGenericError(xmlGenericErrorContext,
12226 "PP: internal error, state == ENTITY_VALUE\n");
12227 ctxt->instate = XML_PARSER_CONTENT;
12228 #ifdef DEBUG_PUSH
12229 xmlGenericError(xmlGenericErrorContext,
12230 "PP: entering DTD\n");
12231 #endif
12232 break;
12233 case XML_PARSER_ATTRIBUTE_VALUE:
12234 xmlGenericError(xmlGenericErrorContext,
12235 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12236 ctxt->instate = XML_PARSER_START_TAG;
12237 #ifdef DEBUG_PUSH
12238 xmlGenericError(xmlGenericErrorContext,
12239 "PP: entering START_TAG\n");
12240 #endif
12241 break;
12242 case XML_PARSER_SYSTEM_LITERAL:
12243 xmlGenericError(xmlGenericErrorContext,
12244 "PP: internal error, state == SYSTEM_LITERAL\n");
12245 ctxt->instate = XML_PARSER_START_TAG;
12246 #ifdef DEBUG_PUSH
12247 xmlGenericError(xmlGenericErrorContext,
12248 "PP: entering START_TAG\n");
12249 #endif
12250 break;
12251 case XML_PARSER_PUBLIC_LITERAL:
12252 xmlGenericError(xmlGenericErrorContext,
12253 "PP: internal error, state == PUBLIC_LITERAL\n");
12254 ctxt->instate = XML_PARSER_START_TAG;
12255 #ifdef DEBUG_PUSH
12256 xmlGenericError(xmlGenericErrorContext,
12257 "PP: entering START_TAG\n");
12258 #endif
12259 break;
12260 }
12261 }
12262 done:
12263 #ifdef DEBUG_PUSH
12264 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12265 #endif
12266 return(ret);
12267 encoding_error:
12268 {
12269 char buffer[150];
12270
12271 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12272 ctxt->input->cur[0], ctxt->input->cur[1],
12273 ctxt->input->cur[2], ctxt->input->cur[3]);
12274 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12275 "Input is not proper UTF-8, indicate encoding !\n%s",
12276 BAD_CAST buffer, NULL);
12277 }
12278 return(0);
12279 }
12280
12281 /**
12282 * xmlParseCheckTransition:
12283 * @ctxt: an XML parser context
12284 * @chunk: a char array
12285 * @size: the size in byte of the chunk
12286 *
12287 * Check depending on the current parser state if the chunk given must be
12288 * processed immediately or one need more data to advance on parsing.
12289 *
12290 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12291 */
12292 static int
xmlParseCheckTransition(xmlParserCtxtPtr ctxt,const char * chunk,int size)12293 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12294 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12295 return(-1);
12296 if (ctxt->instate == XML_PARSER_START_TAG) {
12297 if (memchr(chunk, '>', size) != NULL)
12298 return(1);
12299 return(0);
12300 }
12301 if (ctxt->progressive == XML_PARSER_COMMENT) {
12302 if (memchr(chunk, '>', size) != NULL)
12303 return(1);
12304 return(0);
12305 }
12306 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12307 if (memchr(chunk, '>', size) != NULL)
12308 return(1);
12309 return(0);
12310 }
12311 if (ctxt->progressive == XML_PARSER_PI) {
12312 if (memchr(chunk, '>', size) != NULL)
12313 return(1);
12314 return(0);
12315 }
12316 if (ctxt->instate == XML_PARSER_END_TAG) {
12317 if (memchr(chunk, '>', size) != NULL)
12318 return(1);
12319 return(0);
12320 }
12321 if ((ctxt->progressive == XML_PARSER_DTD) ||
12322 (ctxt->instate == XML_PARSER_DTD)) {
12323 if (memchr(chunk, '>', size) != NULL)
12324 return(1);
12325 return(0);
12326 }
12327 return(1);
12328 }
12329
12330 /**
12331 * xmlParseChunk:
12332 * @ctxt: an XML parser context
12333 * @chunk: an char array
12334 * @size: the size in byte of the chunk
12335 * @terminate: last chunk indicator
12336 *
12337 * Parse a Chunk of memory
12338 *
12339 * Returns zero if no error, the xmlParserErrors otherwise.
12340 */
12341 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)12342 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12343 int terminate) {
12344 int end_in_lf = 0;
12345 int remain = 0;
12346 size_t old_avail = 0;
12347 size_t avail = 0;
12348
12349 if (ctxt == NULL)
12350 return(XML_ERR_INTERNAL_ERROR);
12351 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12352 return(ctxt->errNo);
12353 if (ctxt->instate == XML_PARSER_EOF)
12354 return(-1);
12355 if (ctxt->instate == XML_PARSER_START)
12356 xmlDetectSAX2(ctxt);
12357 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12358 (chunk[size - 1] == '\r')) {
12359 end_in_lf = 1;
12360 size--;
12361 }
12362
12363 xmldecl_done:
12364
12365 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12366 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12367 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12368 size_t cur = ctxt->input->cur - ctxt->input->base;
12369 int res;
12370
12371 old_avail = xmlBufUse(ctxt->input->buf->buffer);
12372 /*
12373 * Specific handling if we autodetected an encoding, we should not
12374 * push more than the first line ... which depend on the encoding
12375 * And only push the rest once the final encoding was detected
12376 */
12377 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12378 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12379 unsigned int len = 45;
12380
12381 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12382 BAD_CAST "UTF-16")) ||
12383 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12384 BAD_CAST "UTF16")))
12385 len = 90;
12386 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12387 BAD_CAST "UCS-4")) ||
12388 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12389 BAD_CAST "UCS4")))
12390 len = 180;
12391
12392 if (ctxt->input->buf->rawconsumed < len)
12393 len -= ctxt->input->buf->rawconsumed;
12394
12395 /*
12396 * Change size for reading the initial declaration only
12397 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12398 * will blindly copy extra bytes from memory.
12399 */
12400 if ((unsigned int) size > len) {
12401 remain = size - len;
12402 size = len;
12403 } else {
12404 remain = 0;
12405 }
12406 }
12407 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12408 if (res < 0) {
12409 ctxt->errNo = XML_PARSER_EOF;
12410 xmlHaltParser(ctxt);
12411 return (XML_PARSER_EOF);
12412 }
12413 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12414 #ifdef DEBUG_PUSH
12415 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12416 #endif
12417
12418 } else if (ctxt->instate != XML_PARSER_EOF) {
12419 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12420 xmlParserInputBufferPtr in = ctxt->input->buf;
12421 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12422 (in->raw != NULL)) {
12423 int nbchars;
12424 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12425 size_t current = ctxt->input->cur - ctxt->input->base;
12426
12427 nbchars = xmlCharEncInput(in, terminate);
12428 if (nbchars < 0) {
12429 /* TODO 2.6.0 */
12430 xmlGenericError(xmlGenericErrorContext,
12431 "xmlParseChunk: encoder error\n");
12432 return(XML_ERR_INVALID_ENCODING);
12433 }
12434 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12435 }
12436 }
12437 }
12438 if (remain != 0) {
12439 xmlParseTryOrFinish(ctxt, 0);
12440 } else {
12441 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12442 avail = xmlBufUse(ctxt->input->buf->buffer);
12443 /*
12444 * Depending on the current state it may not be such
12445 * a good idea to try parsing if there is nothing in the chunk
12446 * which would be worth doing a parser state transition and we
12447 * need to wait for more data
12448 */
12449 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12450 (old_avail == 0) || (avail == 0) ||
12451 (xmlParseCheckTransition(ctxt,
12452 (const char *)&ctxt->input->base[old_avail],
12453 avail - old_avail)))
12454 xmlParseTryOrFinish(ctxt, terminate);
12455 }
12456 if (ctxt->instate == XML_PARSER_EOF)
12457 return(ctxt->errNo);
12458
12459 if ((ctxt->input != NULL) &&
12460 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12461 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12462 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12463 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12464 xmlHaltParser(ctxt);
12465 }
12466 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12467 return(ctxt->errNo);
12468
12469 if (remain != 0) {
12470 chunk += size;
12471 size = remain;
12472 remain = 0;
12473 goto xmldecl_done;
12474 }
12475 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12476 (ctxt->input->buf != NULL)) {
12477 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12478 ctxt->input);
12479 size_t current = ctxt->input->cur - ctxt->input->base;
12480
12481 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12482
12483 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12484 base, current);
12485 }
12486 if (terminate) {
12487 /*
12488 * Check for termination
12489 */
12490 int cur_avail = 0;
12491
12492 if (ctxt->input != NULL) {
12493 if (ctxt->input->buf == NULL)
12494 cur_avail = ctxt->input->length -
12495 (ctxt->input->cur - ctxt->input->base);
12496 else
12497 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12498 (ctxt->input->cur - ctxt->input->base);
12499 }
12500
12501 if ((ctxt->instate != XML_PARSER_EOF) &&
12502 (ctxt->instate != XML_PARSER_EPILOG)) {
12503 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12504 }
12505 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12506 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12507 }
12508 if (ctxt->instate != XML_PARSER_EOF) {
12509 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12510 ctxt->sax->endDocument(ctxt->userData);
12511 }
12512 ctxt->instate = XML_PARSER_EOF;
12513 }
12514 if (ctxt->wellFormed == 0)
12515 return((xmlParserErrors) ctxt->errNo);
12516 else
12517 return(0);
12518 }
12519
12520 /************************************************************************
12521 * *
12522 * I/O front end functions to the parser *
12523 * *
12524 ************************************************************************/
12525
12526 /**
12527 * xmlCreatePushParserCtxt:
12528 * @sax: a SAX handler
12529 * @user_data: The user data returned on SAX callbacks
12530 * @chunk: a pointer to an array of chars
12531 * @size: number of chars in the array
12532 * @filename: an optional file name or URI
12533 *
12534 * Create a parser context for using the XML parser in push mode.
12535 * If @buffer and @size are non-NULL, the data is used to detect
12536 * the encoding. The remaining characters will be parsed so they
12537 * don't need to be fed in again through xmlParseChunk.
12538 * To allow content encoding detection, @size should be >= 4
12539 * The value of @filename is used for fetching external entities
12540 * and error/warning reports.
12541 *
12542 * Returns the new parser context or NULL
12543 */
12544
12545 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)12546 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12547 const char *chunk, int size, const char *filename) {
12548 xmlParserCtxtPtr ctxt;
12549 xmlParserInputPtr inputStream;
12550 xmlParserInputBufferPtr buf;
12551 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12552
12553 /*
12554 * plug some encoding conversion routines
12555 */
12556 if ((chunk != NULL) && (size >= 4))
12557 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12558
12559 buf = xmlAllocParserInputBuffer(enc);
12560 if (buf == NULL) return(NULL);
12561
12562 ctxt = xmlNewParserCtxt();
12563 if (ctxt == NULL) {
12564 xmlErrMemory(NULL, "creating parser: out of memory\n");
12565 xmlFreeParserInputBuffer(buf);
12566 return(NULL);
12567 }
12568 ctxt->dictNames = 1;
12569 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12570 if (ctxt->pushTab == NULL) {
12571 xmlErrMemory(ctxt, NULL);
12572 xmlFreeParserInputBuffer(buf);
12573 xmlFreeParserCtxt(ctxt);
12574 return(NULL);
12575 }
12576 if (sax != NULL) {
12577 #ifdef LIBXML_SAX1_ENABLED
12578 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12579 #endif /* LIBXML_SAX1_ENABLED */
12580 xmlFree(ctxt->sax);
12581 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12582 if (ctxt->sax == NULL) {
12583 xmlErrMemory(ctxt, NULL);
12584 xmlFreeParserInputBuffer(buf);
12585 xmlFreeParserCtxt(ctxt);
12586 return(NULL);
12587 }
12588 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12589 if (sax->initialized == XML_SAX2_MAGIC)
12590 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12591 else
12592 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12593 if (user_data != NULL)
12594 ctxt->userData = user_data;
12595 }
12596 if (filename == NULL) {
12597 ctxt->directory = NULL;
12598 } else {
12599 ctxt->directory = xmlParserGetDirectory(filename);
12600 }
12601
12602 inputStream = xmlNewInputStream(ctxt);
12603 if (inputStream == NULL) {
12604 xmlFreeParserCtxt(ctxt);
12605 xmlFreeParserInputBuffer(buf);
12606 return(NULL);
12607 }
12608
12609 if (filename == NULL)
12610 inputStream->filename = NULL;
12611 else {
12612 inputStream->filename = (char *)
12613 xmlCanonicPath((const xmlChar *) filename);
12614 if (inputStream->filename == NULL) {
12615 xmlFreeParserCtxt(ctxt);
12616 xmlFreeParserInputBuffer(buf);
12617 return(NULL);
12618 }
12619 }
12620 inputStream->buf = buf;
12621 xmlBufResetInput(inputStream->buf->buffer, inputStream);
12622 inputPush(ctxt, inputStream);
12623
12624 /*
12625 * If the caller didn't provide an initial 'chunk' for determining
12626 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12627 * that it can be automatically determined later
12628 */
12629 if ((size == 0) || (chunk == NULL)) {
12630 ctxt->charset = XML_CHAR_ENCODING_NONE;
12631 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12632 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12633 size_t cur = ctxt->input->cur - ctxt->input->base;
12634
12635 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12636
12637 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12638 #ifdef DEBUG_PUSH
12639 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12640 #endif
12641 }
12642
12643 if (enc != XML_CHAR_ENCODING_NONE) {
12644 xmlSwitchEncoding(ctxt, enc);
12645 }
12646
12647 return(ctxt);
12648 }
12649 #endif /* LIBXML_PUSH_ENABLED */
12650
12651 /**
12652 * xmlHaltParser:
12653 * @ctxt: an XML parser context
12654 *
12655 * Blocks further parser processing don't override error
12656 * for internal use
12657 */
12658 static void
xmlHaltParser(xmlParserCtxtPtr ctxt)12659 xmlHaltParser(xmlParserCtxtPtr ctxt) {
12660 if (ctxt == NULL)
12661 return;
12662 ctxt->instate = XML_PARSER_EOF;
12663 ctxt->disableSAX = 1;
12664 if (ctxt->input != NULL) {
12665 /*
12666 * in case there was a specific allocation deallocate before
12667 * overriding base
12668 */
12669 if (ctxt->input->free != NULL) {
12670 ctxt->input->free((xmlChar *) ctxt->input->base);
12671 ctxt->input->free = NULL;
12672 }
12673 ctxt->input->cur = BAD_CAST"";
12674 ctxt->input->base = ctxt->input->cur;
12675 }
12676 }
12677
12678 /**
12679 * xmlStopParser:
12680 * @ctxt: an XML parser context
12681 *
12682 * Blocks further parser processing
12683 */
12684 void
xmlStopParser(xmlParserCtxtPtr ctxt)12685 xmlStopParser(xmlParserCtxtPtr ctxt) {
12686 if (ctxt == NULL)
12687 return;
12688 xmlHaltParser(ctxt);
12689 ctxt->errNo = XML_ERR_USER_STOP;
12690 }
12691
12692 /**
12693 * xmlCreateIOParserCtxt:
12694 * @sax: a SAX handler
12695 * @user_data: The user data returned on SAX callbacks
12696 * @ioread: an I/O read function
12697 * @ioclose: an I/O close function
12698 * @ioctx: an I/O handler
12699 * @enc: the charset encoding if known
12700 *
12701 * Create a parser context for using the XML parser with an existing
12702 * I/O stream
12703 *
12704 * Returns the new parser context or NULL
12705 */
12706 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)12707 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12708 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12709 void *ioctx, xmlCharEncoding enc) {
12710 xmlParserCtxtPtr ctxt;
12711 xmlParserInputPtr inputStream;
12712 xmlParserInputBufferPtr buf;
12713
12714 if (ioread == NULL) return(NULL);
12715
12716 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12717 if (buf == NULL) {
12718 if (ioclose != NULL)
12719 ioclose(ioctx);
12720 return (NULL);
12721 }
12722
12723 ctxt = xmlNewParserCtxt();
12724 if (ctxt == NULL) {
12725 xmlFreeParserInputBuffer(buf);
12726 return(NULL);
12727 }
12728 if (sax != NULL) {
12729 #ifdef LIBXML_SAX1_ENABLED
12730 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12731 #endif /* LIBXML_SAX1_ENABLED */
12732 xmlFree(ctxt->sax);
12733 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12734 if (ctxt->sax == NULL) {
12735 xmlErrMemory(ctxt, NULL);
12736 xmlFreeParserCtxt(ctxt);
12737 return(NULL);
12738 }
12739 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12740 if (sax->initialized == XML_SAX2_MAGIC)
12741 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12742 else
12743 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12744 if (user_data != NULL)
12745 ctxt->userData = user_data;
12746 }
12747
12748 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12749 if (inputStream == NULL) {
12750 xmlFreeParserCtxt(ctxt);
12751 return(NULL);
12752 }
12753 inputPush(ctxt, inputStream);
12754
12755 return(ctxt);
12756 }
12757
12758 #ifdef LIBXML_VALID_ENABLED
12759 /************************************************************************
12760 * *
12761 * Front ends when parsing a DTD *
12762 * *
12763 ************************************************************************/
12764
12765 /**
12766 * xmlIOParseDTD:
12767 * @sax: the SAX handler block or NULL
12768 * @input: an Input Buffer
12769 * @enc: the charset encoding if known
12770 *
12771 * Load and parse a DTD
12772 *
12773 * Returns the resulting xmlDtdPtr or NULL in case of error.
12774 * @input will be freed by the function in any case.
12775 */
12776
12777 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)12778 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12779 xmlCharEncoding enc) {
12780 xmlDtdPtr ret = NULL;
12781 xmlParserCtxtPtr ctxt;
12782 xmlParserInputPtr pinput = NULL;
12783 xmlChar start[4];
12784
12785 if (input == NULL)
12786 return(NULL);
12787
12788 ctxt = xmlNewParserCtxt();
12789 if (ctxt == NULL) {
12790 xmlFreeParserInputBuffer(input);
12791 return(NULL);
12792 }
12793
12794 /* We are loading a DTD */
12795 ctxt->options |= XML_PARSE_DTDLOAD;
12796
12797 /*
12798 * Set-up the SAX context
12799 */
12800 if (sax != NULL) {
12801 if (ctxt->sax != NULL)
12802 xmlFree(ctxt->sax);
12803 ctxt->sax = sax;
12804 ctxt->userData = ctxt;
12805 }
12806 xmlDetectSAX2(ctxt);
12807
12808 /*
12809 * generate a parser input from the I/O handler
12810 */
12811
12812 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12813 if (pinput == NULL) {
12814 if (sax != NULL) ctxt->sax = NULL;
12815 xmlFreeParserInputBuffer(input);
12816 xmlFreeParserCtxt(ctxt);
12817 return(NULL);
12818 }
12819
12820 /*
12821 * plug some encoding conversion routines here.
12822 */
12823 if (xmlPushInput(ctxt, pinput) < 0) {
12824 if (sax != NULL) ctxt->sax = NULL;
12825 xmlFreeParserCtxt(ctxt);
12826 return(NULL);
12827 }
12828 if (enc != XML_CHAR_ENCODING_NONE) {
12829 xmlSwitchEncoding(ctxt, enc);
12830 }
12831
12832 pinput->filename = NULL;
12833 pinput->line = 1;
12834 pinput->col = 1;
12835 pinput->base = ctxt->input->cur;
12836 pinput->cur = ctxt->input->cur;
12837 pinput->free = NULL;
12838
12839 /*
12840 * let's parse that entity knowing it's an external subset.
12841 */
12842 ctxt->inSubset = 2;
12843 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12844 if (ctxt->myDoc == NULL) {
12845 xmlErrMemory(ctxt, "New Doc failed");
12846 return(NULL);
12847 }
12848 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12849 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12850 BAD_CAST "none", BAD_CAST "none");
12851
12852 if ((enc == XML_CHAR_ENCODING_NONE) &&
12853 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12854 /*
12855 * Get the 4 first bytes and decode the charset
12856 * if enc != XML_CHAR_ENCODING_NONE
12857 * plug some encoding conversion routines.
12858 */
12859 start[0] = RAW;
12860 start[1] = NXT(1);
12861 start[2] = NXT(2);
12862 start[3] = NXT(3);
12863 enc = xmlDetectCharEncoding(start, 4);
12864 if (enc != XML_CHAR_ENCODING_NONE) {
12865 xmlSwitchEncoding(ctxt, enc);
12866 }
12867 }
12868
12869 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12870
12871 if (ctxt->myDoc != NULL) {
12872 if (ctxt->wellFormed) {
12873 ret = ctxt->myDoc->extSubset;
12874 ctxt->myDoc->extSubset = NULL;
12875 if (ret != NULL) {
12876 xmlNodePtr tmp;
12877
12878 ret->doc = NULL;
12879 tmp = ret->children;
12880 while (tmp != NULL) {
12881 tmp->doc = NULL;
12882 tmp = tmp->next;
12883 }
12884 }
12885 } else {
12886 ret = NULL;
12887 }
12888 xmlFreeDoc(ctxt->myDoc);
12889 ctxt->myDoc = NULL;
12890 }
12891 if (sax != NULL) ctxt->sax = NULL;
12892 xmlFreeParserCtxt(ctxt);
12893
12894 return(ret);
12895 }
12896
12897 /**
12898 * xmlSAXParseDTD:
12899 * @sax: the SAX handler block
12900 * @ExternalID: a NAME* containing the External ID of the DTD
12901 * @SystemID: a NAME* containing the URL to the DTD
12902 *
12903 * Load and parse an external subset.
12904 *
12905 * Returns the resulting xmlDtdPtr or NULL in case of error.
12906 */
12907
12908 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)12909 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12910 const xmlChar *SystemID) {
12911 xmlDtdPtr ret = NULL;
12912 xmlParserCtxtPtr ctxt;
12913 xmlParserInputPtr input = NULL;
12914 xmlCharEncoding enc;
12915 xmlChar* systemIdCanonic;
12916
12917 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12918
12919 ctxt = xmlNewParserCtxt();
12920 if (ctxt == NULL) {
12921 return(NULL);
12922 }
12923
12924 /* We are loading a DTD */
12925 ctxt->options |= XML_PARSE_DTDLOAD;
12926
12927 /*
12928 * Set-up the SAX context
12929 */
12930 if (sax != NULL) {
12931 if (ctxt->sax != NULL)
12932 xmlFree(ctxt->sax);
12933 ctxt->sax = sax;
12934 ctxt->userData = ctxt;
12935 }
12936
12937 /*
12938 * Canonicalise the system ID
12939 */
12940 systemIdCanonic = xmlCanonicPath(SystemID);
12941 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12942 xmlFreeParserCtxt(ctxt);
12943 return(NULL);
12944 }
12945
12946 /*
12947 * Ask the Entity resolver to load the damn thing
12948 */
12949
12950 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12951 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12952 systemIdCanonic);
12953 if (input == NULL) {
12954 if (sax != NULL) ctxt->sax = NULL;
12955 xmlFreeParserCtxt(ctxt);
12956 if (systemIdCanonic != NULL)
12957 xmlFree(systemIdCanonic);
12958 return(NULL);
12959 }
12960
12961 /*
12962 * plug some encoding conversion routines here.
12963 */
12964 if (xmlPushInput(ctxt, input) < 0) {
12965 if (sax != NULL) ctxt->sax = NULL;
12966 xmlFreeParserCtxt(ctxt);
12967 if (systemIdCanonic != NULL)
12968 xmlFree(systemIdCanonic);
12969 return(NULL);
12970 }
12971 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12972 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12973 xmlSwitchEncoding(ctxt, enc);
12974 }
12975
12976 if (input->filename == NULL)
12977 input->filename = (char *) systemIdCanonic;
12978 else
12979 xmlFree(systemIdCanonic);
12980 input->line = 1;
12981 input->col = 1;
12982 input->base = ctxt->input->cur;
12983 input->cur = ctxt->input->cur;
12984 input->free = NULL;
12985
12986 /*
12987 * let's parse that entity knowing it's an external subset.
12988 */
12989 ctxt->inSubset = 2;
12990 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12991 if (ctxt->myDoc == NULL) {
12992 xmlErrMemory(ctxt, "New Doc failed");
12993 if (sax != NULL) ctxt->sax = NULL;
12994 xmlFreeParserCtxt(ctxt);
12995 return(NULL);
12996 }
12997 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12998 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12999 ExternalID, SystemID);
13000 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
13001
13002 if (ctxt->myDoc != NULL) {
13003 if (ctxt->wellFormed) {
13004 ret = ctxt->myDoc->extSubset;
13005 ctxt->myDoc->extSubset = NULL;
13006 if (ret != NULL) {
13007 xmlNodePtr tmp;
13008
13009 ret->doc = NULL;
13010 tmp = ret->children;
13011 while (tmp != NULL) {
13012 tmp->doc = NULL;
13013 tmp = tmp->next;
13014 }
13015 }
13016 } else {
13017 ret = NULL;
13018 }
13019 xmlFreeDoc(ctxt->myDoc);
13020 ctxt->myDoc = NULL;
13021 }
13022 if (sax != NULL) ctxt->sax = NULL;
13023 xmlFreeParserCtxt(ctxt);
13024
13025 return(ret);
13026 }
13027
13028
13029 /**
13030 * xmlParseDTD:
13031 * @ExternalID: a NAME* containing the External ID of the DTD
13032 * @SystemID: a NAME* containing the URL to the DTD
13033 *
13034 * Load and parse an external subset.
13035 *
13036 * Returns the resulting xmlDtdPtr or NULL in case of error.
13037 */
13038
13039 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)13040 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
13041 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
13042 }
13043 #endif /* LIBXML_VALID_ENABLED */
13044
13045 /************************************************************************
13046 * *
13047 * Front ends when parsing an Entity *
13048 * *
13049 ************************************************************************/
13050
13051 /**
13052 * xmlParseCtxtExternalEntity:
13053 * @ctx: the existing parsing context
13054 * @URL: the URL for the entity to load
13055 * @ID: the System ID for the entity to load
13056 * @lst: the return value for the set of parsed nodes
13057 *
13058 * Parse an external general entity within an existing parsing context
13059 * An external general parsed entity is well-formed if it matches the
13060 * production labeled extParsedEnt.
13061 *
13062 * [78] extParsedEnt ::= TextDecl? content
13063 *
13064 * Returns 0 if the entity is well formed, -1 in case of args problem and
13065 * the parser error code otherwise
13066 */
13067
13068 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)13069 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
13070 const xmlChar *ID, xmlNodePtr *lst) {
13071 xmlParserCtxtPtr ctxt;
13072 xmlDocPtr newDoc;
13073 xmlNodePtr newRoot;
13074 xmlSAXHandlerPtr oldsax = NULL;
13075 int ret = 0;
13076 xmlChar start[4];
13077 xmlCharEncoding enc;
13078
13079 if (ctx == NULL) return(-1);
13080
13081 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
13082 (ctx->depth > 1024)) {
13083 return(XML_ERR_ENTITY_LOOP);
13084 }
13085
13086 if (lst != NULL)
13087 *lst = NULL;
13088 if ((URL == NULL) && (ID == NULL))
13089 return(-1);
13090 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
13091 return(-1);
13092
13093 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
13094 if (ctxt == NULL) {
13095 return(-1);
13096 }
13097
13098 oldsax = ctxt->sax;
13099 ctxt->sax = ctx->sax;
13100 xmlDetectSAX2(ctxt);
13101 newDoc = xmlNewDoc(BAD_CAST "1.0");
13102 if (newDoc == NULL) {
13103 xmlFreeParserCtxt(ctxt);
13104 return(-1);
13105 }
13106 newDoc->properties = XML_DOC_INTERNAL;
13107 if (ctx->myDoc->dict) {
13108 newDoc->dict = ctx->myDoc->dict;
13109 xmlDictReference(newDoc->dict);
13110 }
13111 if (ctx->myDoc != NULL) {
13112 newDoc->intSubset = ctx->myDoc->intSubset;
13113 newDoc->extSubset = ctx->myDoc->extSubset;
13114 }
13115 if (ctx->myDoc->URL != NULL) {
13116 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
13117 }
13118 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13119 if (newRoot == NULL) {
13120 ctxt->sax = oldsax;
13121 xmlFreeParserCtxt(ctxt);
13122 newDoc->intSubset = NULL;
13123 newDoc->extSubset = NULL;
13124 xmlFreeDoc(newDoc);
13125 return(-1);
13126 }
13127 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13128 nodePush(ctxt, newDoc->children);
13129 if (ctx->myDoc == NULL) {
13130 ctxt->myDoc = newDoc;
13131 } else {
13132 ctxt->myDoc = ctx->myDoc;
13133 newDoc->children->doc = ctx->myDoc;
13134 }
13135
13136 /*
13137 * Get the 4 first bytes and decode the charset
13138 * if enc != XML_CHAR_ENCODING_NONE
13139 * plug some encoding conversion routines.
13140 */
13141 GROW
13142 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13143 start[0] = RAW;
13144 start[1] = NXT(1);
13145 start[2] = NXT(2);
13146 start[3] = NXT(3);
13147 enc = xmlDetectCharEncoding(start, 4);
13148 if (enc != XML_CHAR_ENCODING_NONE) {
13149 xmlSwitchEncoding(ctxt, enc);
13150 }
13151 }
13152
13153 /*
13154 * Parse a possible text declaration first
13155 */
13156 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13157 xmlParseTextDecl(ctxt);
13158 /*
13159 * An XML-1.0 document can't reference an entity not XML-1.0
13160 */
13161 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
13162 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13163 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13164 "Version mismatch between document and entity\n");
13165 }
13166 }
13167
13168 /*
13169 * If the user provided its own SAX callbacks then reuse the
13170 * useData callback field, otherwise the expected setup in a
13171 * DOM builder is to have userData == ctxt
13172 */
13173 if (ctx->userData == ctx)
13174 ctxt->userData = ctxt;
13175 else
13176 ctxt->userData = ctx->userData;
13177
13178 /*
13179 * Doing validity checking on chunk doesn't make sense
13180 */
13181 ctxt->instate = XML_PARSER_CONTENT;
13182 ctxt->validate = ctx->validate;
13183 ctxt->valid = ctx->valid;
13184 ctxt->loadsubset = ctx->loadsubset;
13185 ctxt->depth = ctx->depth + 1;
13186 ctxt->replaceEntities = ctx->replaceEntities;
13187 if (ctxt->validate) {
13188 ctxt->vctxt.error = ctx->vctxt.error;
13189 ctxt->vctxt.warning = ctx->vctxt.warning;
13190 } else {
13191 ctxt->vctxt.error = NULL;
13192 ctxt->vctxt.warning = NULL;
13193 }
13194 ctxt->vctxt.nodeTab = NULL;
13195 ctxt->vctxt.nodeNr = 0;
13196 ctxt->vctxt.nodeMax = 0;
13197 ctxt->vctxt.node = NULL;
13198 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13199 ctxt->dict = ctx->dict;
13200 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13201 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13202 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13203 ctxt->dictNames = ctx->dictNames;
13204 ctxt->attsDefault = ctx->attsDefault;
13205 ctxt->attsSpecial = ctx->attsSpecial;
13206 ctxt->linenumbers = ctx->linenumbers;
13207
13208 xmlParseContent(ctxt);
13209
13210 ctx->validate = ctxt->validate;
13211 ctx->valid = ctxt->valid;
13212 if ((RAW == '<') && (NXT(1) == '/')) {
13213 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13214 } else if (RAW != 0) {
13215 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13216 }
13217 if (ctxt->node != newDoc->children) {
13218 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13219 }
13220
13221 if (!ctxt->wellFormed) {
13222 if (ctxt->errNo == 0)
13223 ret = 1;
13224 else
13225 ret = ctxt->errNo;
13226 } else {
13227 if (lst != NULL) {
13228 xmlNodePtr cur;
13229
13230 /*
13231 * Return the newly created nodeset after unlinking it from
13232 * they pseudo parent.
13233 */
13234 cur = newDoc->children->children;
13235 *lst = cur;
13236 while (cur != NULL) {
13237 cur->parent = NULL;
13238 cur = cur->next;
13239 }
13240 newDoc->children->children = NULL;
13241 }
13242 ret = 0;
13243 }
13244 ctxt->sax = oldsax;
13245 ctxt->dict = NULL;
13246 ctxt->attsDefault = NULL;
13247 ctxt->attsSpecial = NULL;
13248 xmlFreeParserCtxt(ctxt);
13249 newDoc->intSubset = NULL;
13250 newDoc->extSubset = NULL;
13251 xmlFreeDoc(newDoc);
13252
13253 return(ret);
13254 }
13255
13256 /**
13257 * xmlParseExternalEntityPrivate:
13258 * @doc: the document the chunk pertains to
13259 * @oldctxt: the previous parser context if available
13260 * @sax: the SAX handler bloc (possibly NULL)
13261 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13262 * @depth: Used for loop detection, use 0
13263 * @URL: the URL for the entity to load
13264 * @ID: the System ID for the entity to load
13265 * @list: the return value for the set of parsed nodes
13266 *
13267 * Private version of xmlParseExternalEntity()
13268 *
13269 * Returns 0 if the entity is well formed, -1 in case of args problem and
13270 * the parser error code otherwise
13271 */
13272
13273 static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc,xmlParserCtxtPtr oldctxt,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)13274 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13275 xmlSAXHandlerPtr sax,
13276 void *user_data, int depth, const xmlChar *URL,
13277 const xmlChar *ID, xmlNodePtr *list) {
13278 xmlParserCtxtPtr ctxt;
13279 xmlDocPtr newDoc;
13280 xmlNodePtr newRoot;
13281 xmlSAXHandlerPtr oldsax = NULL;
13282 xmlParserErrors ret = XML_ERR_OK;
13283 xmlChar start[4];
13284 xmlCharEncoding enc;
13285
13286 if (((depth > 40) &&
13287 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13288 (depth > 1024)) {
13289 return(XML_ERR_ENTITY_LOOP);
13290 }
13291
13292 if (list != NULL)
13293 *list = NULL;
13294 if ((URL == NULL) && (ID == NULL))
13295 return(XML_ERR_INTERNAL_ERROR);
13296 if (doc == NULL)
13297 return(XML_ERR_INTERNAL_ERROR);
13298
13299
13300 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13301 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13302 ctxt->userData = ctxt;
13303 if (oldctxt != NULL) {
13304 ctxt->_private = oldctxt->_private;
13305 ctxt->loadsubset = oldctxt->loadsubset;
13306 ctxt->validate = oldctxt->validate;
13307 ctxt->external = oldctxt->external;
13308 ctxt->record_info = oldctxt->record_info;
13309 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13310 ctxt->node_seq.length = oldctxt->node_seq.length;
13311 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13312 } else {
13313 /*
13314 * Doing validity checking on chunk without context
13315 * doesn't make sense
13316 */
13317 ctxt->_private = NULL;
13318 ctxt->validate = 0;
13319 ctxt->external = 2;
13320 ctxt->loadsubset = 0;
13321 }
13322 if (sax != NULL) {
13323 oldsax = ctxt->sax;
13324 ctxt->sax = sax;
13325 if (user_data != NULL)
13326 ctxt->userData = user_data;
13327 }
13328 xmlDetectSAX2(ctxt);
13329 newDoc = xmlNewDoc(BAD_CAST "1.0");
13330 if (newDoc == NULL) {
13331 ctxt->node_seq.maximum = 0;
13332 ctxt->node_seq.length = 0;
13333 ctxt->node_seq.buffer = NULL;
13334 xmlFreeParserCtxt(ctxt);
13335 return(XML_ERR_INTERNAL_ERROR);
13336 }
13337 newDoc->properties = XML_DOC_INTERNAL;
13338 newDoc->intSubset = doc->intSubset;
13339 newDoc->extSubset = doc->extSubset;
13340 newDoc->dict = doc->dict;
13341 xmlDictReference(newDoc->dict);
13342
13343 if (doc->URL != NULL) {
13344 newDoc->URL = xmlStrdup(doc->URL);
13345 }
13346 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13347 if (newRoot == NULL) {
13348 if (sax != NULL)
13349 ctxt->sax = oldsax;
13350 ctxt->node_seq.maximum = 0;
13351 ctxt->node_seq.length = 0;
13352 ctxt->node_seq.buffer = NULL;
13353 xmlFreeParserCtxt(ctxt);
13354 newDoc->intSubset = NULL;
13355 newDoc->extSubset = NULL;
13356 xmlFreeDoc(newDoc);
13357 return(XML_ERR_INTERNAL_ERROR);
13358 }
13359 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13360 nodePush(ctxt, newDoc->children);
13361 ctxt->myDoc = doc;
13362 newRoot->doc = doc;
13363
13364 /*
13365 * Get the 4 first bytes and decode the charset
13366 * if enc != XML_CHAR_ENCODING_NONE
13367 * plug some encoding conversion routines.
13368 */
13369 GROW;
13370 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13371 start[0] = RAW;
13372 start[1] = NXT(1);
13373 start[2] = NXT(2);
13374 start[3] = NXT(3);
13375 enc = xmlDetectCharEncoding(start, 4);
13376 if (enc != XML_CHAR_ENCODING_NONE) {
13377 xmlSwitchEncoding(ctxt, enc);
13378 }
13379 }
13380
13381 /*
13382 * Parse a possible text declaration first
13383 */
13384 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13385 xmlParseTextDecl(ctxt);
13386 }
13387
13388 ctxt->instate = XML_PARSER_CONTENT;
13389 ctxt->depth = depth;
13390
13391 xmlParseContent(ctxt);
13392
13393 if ((RAW == '<') && (NXT(1) == '/')) {
13394 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13395 } else if (RAW != 0) {
13396 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13397 }
13398 if (ctxt->node != newDoc->children) {
13399 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13400 }
13401
13402 if (!ctxt->wellFormed) {
13403 if (ctxt->errNo == 0)
13404 ret = XML_ERR_INTERNAL_ERROR;
13405 else
13406 ret = (xmlParserErrors)ctxt->errNo;
13407 } else {
13408 if (list != NULL) {
13409 xmlNodePtr cur;
13410
13411 /*
13412 * Return the newly created nodeset after unlinking it from
13413 * they pseudo parent.
13414 */
13415 cur = newDoc->children->children;
13416 *list = cur;
13417 while (cur != NULL) {
13418 cur->parent = NULL;
13419 cur = cur->next;
13420 }
13421 newDoc->children->children = NULL;
13422 }
13423 ret = XML_ERR_OK;
13424 }
13425
13426 /*
13427 * Record in the parent context the number of entities replacement
13428 * done when parsing that reference.
13429 */
13430 if (oldctxt != NULL)
13431 oldctxt->nbentities += ctxt->nbentities;
13432
13433 /*
13434 * Also record the size of the entity parsed
13435 */
13436 if (ctxt->input != NULL && oldctxt != NULL) {
13437 oldctxt->sizeentities += ctxt->input->consumed;
13438 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13439 }
13440 /*
13441 * And record the last error if any
13442 */
13443 if (ctxt->lastError.code != XML_ERR_OK)
13444 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13445
13446 if (sax != NULL)
13447 ctxt->sax = oldsax;
13448 if (oldctxt != NULL) {
13449 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13450 oldctxt->node_seq.length = ctxt->node_seq.length;
13451 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13452 }
13453 ctxt->node_seq.maximum = 0;
13454 ctxt->node_seq.length = 0;
13455 ctxt->node_seq.buffer = NULL;
13456 xmlFreeParserCtxt(ctxt);
13457 newDoc->intSubset = NULL;
13458 newDoc->extSubset = NULL;
13459 xmlFreeDoc(newDoc);
13460
13461 return(ret);
13462 }
13463
13464 #ifdef LIBXML_SAX1_ENABLED
13465 /**
13466 * xmlParseExternalEntity:
13467 * @doc: the document the chunk pertains to
13468 * @sax: the SAX handler bloc (possibly NULL)
13469 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13470 * @depth: Used for loop detection, use 0
13471 * @URL: the URL for the entity to load
13472 * @ID: the System ID for the entity to load
13473 * @lst: the return value for the set of parsed nodes
13474 *
13475 * Parse an external general entity
13476 * An external general parsed entity is well-formed if it matches the
13477 * production labeled extParsedEnt.
13478 *
13479 * [78] extParsedEnt ::= TextDecl? content
13480 *
13481 * Returns 0 if the entity is well formed, -1 in case of args problem and
13482 * the parser error code otherwise
13483 */
13484
13485 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)13486 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13487 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13488 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13489 ID, lst));
13490 }
13491
13492 /**
13493 * xmlParseBalancedChunkMemory:
13494 * @doc: the document the chunk pertains to
13495 * @sax: the SAX handler bloc (possibly NULL)
13496 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13497 * @depth: Used for loop detection, use 0
13498 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13499 * @lst: the return value for the set of parsed nodes
13500 *
13501 * Parse a well-balanced chunk of an XML document
13502 * called by the parser
13503 * The allowed sequence for the Well Balanced Chunk is the one defined by
13504 * the content production in the XML grammar:
13505 *
13506 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13507 *
13508 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13509 * the parser error code otherwise
13510 */
13511
13512 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)13513 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13514 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13515 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13516 depth, string, lst, 0 );
13517 }
13518 #endif /* LIBXML_SAX1_ENABLED */
13519
13520 /**
13521 * xmlParseBalancedChunkMemoryInternal:
13522 * @oldctxt: the existing parsing context
13523 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13524 * @user_data: the user data field for the parser context
13525 * @lst: the return value for the set of parsed nodes
13526 *
13527 *
13528 * Parse a well-balanced chunk of an XML document
13529 * called by the parser
13530 * The allowed sequence for the Well Balanced Chunk is the one defined by
13531 * the content production in the XML grammar:
13532 *
13533 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13534 *
13535 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13536 * error code otherwise
13537 *
13538 * In case recover is set to 1, the nodelist will not be empty even if
13539 * the parsed chunk is not well balanced.
13540 */
13541 static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,const xmlChar * string,void * user_data,xmlNodePtr * lst)13542 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13543 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13544 xmlParserCtxtPtr ctxt;
13545 xmlDocPtr newDoc = NULL;
13546 xmlNodePtr newRoot;
13547 xmlSAXHandlerPtr oldsax = NULL;
13548 xmlNodePtr content = NULL;
13549 xmlNodePtr last = NULL;
13550 int size;
13551 xmlParserErrors ret = XML_ERR_OK;
13552 #ifdef SAX2
13553 int i;
13554 #endif
13555
13556 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13557 (oldctxt->depth > 1024)) {
13558 return(XML_ERR_ENTITY_LOOP);
13559 }
13560
13561
13562 if (lst != NULL)
13563 *lst = NULL;
13564 if (string == NULL)
13565 return(XML_ERR_INTERNAL_ERROR);
13566
13567 size = xmlStrlen(string);
13568
13569 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13570 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13571 if (user_data != NULL)
13572 ctxt->userData = user_data;
13573 else
13574 ctxt->userData = ctxt;
13575 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13576 ctxt->dict = oldctxt->dict;
13577 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13578 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13579 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13580
13581 #ifdef SAX2
13582 /* propagate namespaces down the entity */
13583 for (i = 0;i < oldctxt->nsNr;i += 2) {
13584 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13585 }
13586 #endif
13587
13588 oldsax = ctxt->sax;
13589 ctxt->sax = oldctxt->sax;
13590 xmlDetectSAX2(ctxt);
13591 ctxt->replaceEntities = oldctxt->replaceEntities;
13592 ctxt->options = oldctxt->options;
13593
13594 ctxt->_private = oldctxt->_private;
13595 if (oldctxt->myDoc == NULL) {
13596 newDoc = xmlNewDoc(BAD_CAST "1.0");
13597 if (newDoc == NULL) {
13598 ctxt->sax = oldsax;
13599 ctxt->dict = NULL;
13600 xmlFreeParserCtxt(ctxt);
13601 return(XML_ERR_INTERNAL_ERROR);
13602 }
13603 newDoc->properties = XML_DOC_INTERNAL;
13604 newDoc->dict = ctxt->dict;
13605 xmlDictReference(newDoc->dict);
13606 ctxt->myDoc = newDoc;
13607 } else {
13608 ctxt->myDoc = oldctxt->myDoc;
13609 content = ctxt->myDoc->children;
13610 last = ctxt->myDoc->last;
13611 }
13612 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13613 if (newRoot == NULL) {
13614 ctxt->sax = oldsax;
13615 ctxt->dict = NULL;
13616 xmlFreeParserCtxt(ctxt);
13617 if (newDoc != NULL) {
13618 xmlFreeDoc(newDoc);
13619 }
13620 return(XML_ERR_INTERNAL_ERROR);
13621 }
13622 ctxt->myDoc->children = NULL;
13623 ctxt->myDoc->last = NULL;
13624 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13625 nodePush(ctxt, ctxt->myDoc->children);
13626 ctxt->instate = XML_PARSER_CONTENT;
13627 ctxt->depth = oldctxt->depth + 1;
13628
13629 ctxt->validate = 0;
13630 ctxt->loadsubset = oldctxt->loadsubset;
13631 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13632 /*
13633 * ID/IDREF registration will be done in xmlValidateElement below
13634 */
13635 ctxt->loadsubset |= XML_SKIP_IDS;
13636 }
13637 ctxt->dictNames = oldctxt->dictNames;
13638 ctxt->attsDefault = oldctxt->attsDefault;
13639 ctxt->attsSpecial = oldctxt->attsSpecial;
13640
13641 xmlParseContent(ctxt);
13642 if ((RAW == '<') && (NXT(1) == '/')) {
13643 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13644 } else if (RAW != 0) {
13645 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13646 }
13647 if (ctxt->node != ctxt->myDoc->children) {
13648 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13649 }
13650
13651 if (!ctxt->wellFormed) {
13652 if (ctxt->errNo == 0)
13653 ret = XML_ERR_INTERNAL_ERROR;
13654 else
13655 ret = (xmlParserErrors)ctxt->errNo;
13656 } else {
13657 ret = XML_ERR_OK;
13658 }
13659
13660 if ((lst != NULL) && (ret == XML_ERR_OK)) {
13661 xmlNodePtr cur;
13662
13663 /*
13664 * Return the newly created nodeset after unlinking it from
13665 * they pseudo parent.
13666 */
13667 cur = ctxt->myDoc->children->children;
13668 *lst = cur;
13669 while (cur != NULL) {
13670 #ifdef LIBXML_VALID_ENABLED
13671 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13672 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13673 (cur->type == XML_ELEMENT_NODE)) {
13674 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13675 oldctxt->myDoc, cur);
13676 }
13677 #endif /* LIBXML_VALID_ENABLED */
13678 cur->parent = NULL;
13679 cur = cur->next;
13680 }
13681 ctxt->myDoc->children->children = NULL;
13682 }
13683 if (ctxt->myDoc != NULL) {
13684 xmlFreeNode(ctxt->myDoc->children);
13685 ctxt->myDoc->children = content;
13686 ctxt->myDoc->last = last;
13687 }
13688
13689 /*
13690 * Record in the parent context the number of entities replacement
13691 * done when parsing that reference.
13692 */
13693 if (oldctxt != NULL)
13694 oldctxt->nbentities += ctxt->nbentities;
13695
13696 /*
13697 * Also record the last error if any
13698 */
13699 if (ctxt->lastError.code != XML_ERR_OK)
13700 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13701
13702 ctxt->sax = oldsax;
13703 ctxt->dict = NULL;
13704 ctxt->attsDefault = NULL;
13705 ctxt->attsSpecial = NULL;
13706 xmlFreeParserCtxt(ctxt);
13707 if (newDoc != NULL) {
13708 xmlFreeDoc(newDoc);
13709 }
13710
13711 return(ret);
13712 }
13713
13714 /**
13715 * xmlParseInNodeContext:
13716 * @node: the context node
13717 * @data: the input string
13718 * @datalen: the input string length in bytes
13719 * @options: a combination of xmlParserOption
13720 * @lst: the return value for the set of parsed nodes
13721 *
13722 * Parse a well-balanced chunk of an XML document
13723 * within the context (DTD, namespaces, etc ...) of the given node.
13724 *
13725 * The allowed sequence for the data is a Well Balanced Chunk defined by
13726 * the content production in the XML grammar:
13727 *
13728 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13729 *
13730 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13731 * error code otherwise
13732 */
13733 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)13734 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13735 int options, xmlNodePtr *lst) {
13736 #ifdef SAX2
13737 xmlParserCtxtPtr ctxt;
13738 xmlDocPtr doc = NULL;
13739 xmlNodePtr fake, cur;
13740 int nsnr = 0;
13741
13742 xmlParserErrors ret = XML_ERR_OK;
13743
13744 /*
13745 * check all input parameters, grab the document
13746 */
13747 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13748 return(XML_ERR_INTERNAL_ERROR);
13749 switch (node->type) {
13750 case XML_ELEMENT_NODE:
13751 case XML_ATTRIBUTE_NODE:
13752 case XML_TEXT_NODE:
13753 case XML_CDATA_SECTION_NODE:
13754 case XML_ENTITY_REF_NODE:
13755 case XML_PI_NODE:
13756 case XML_COMMENT_NODE:
13757 case XML_DOCUMENT_NODE:
13758 case XML_HTML_DOCUMENT_NODE:
13759 break;
13760 default:
13761 return(XML_ERR_INTERNAL_ERROR);
13762
13763 }
13764 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13765 (node->type != XML_DOCUMENT_NODE) &&
13766 (node->type != XML_HTML_DOCUMENT_NODE))
13767 node = node->parent;
13768 if (node == NULL)
13769 return(XML_ERR_INTERNAL_ERROR);
13770 if (node->type == XML_ELEMENT_NODE)
13771 doc = node->doc;
13772 else
13773 doc = (xmlDocPtr) node;
13774 if (doc == NULL)
13775 return(XML_ERR_INTERNAL_ERROR);
13776
13777 /*
13778 * allocate a context and set-up everything not related to the
13779 * node position in the tree
13780 */
13781 if (doc->type == XML_DOCUMENT_NODE)
13782 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13783 #ifdef LIBXML_HTML_ENABLED
13784 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13785 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13786 /*
13787 * When parsing in context, it makes no sense to add implied
13788 * elements like html/body/etc...
13789 */
13790 options |= HTML_PARSE_NOIMPLIED;
13791 }
13792 #endif
13793 else
13794 return(XML_ERR_INTERNAL_ERROR);
13795
13796 if (ctxt == NULL)
13797 return(XML_ERR_NO_MEMORY);
13798
13799 /*
13800 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13801 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13802 * we must wait until the last moment to free the original one.
13803 */
13804 if (doc->dict != NULL) {
13805 if (ctxt->dict != NULL)
13806 xmlDictFree(ctxt->dict);
13807 ctxt->dict = doc->dict;
13808 } else
13809 options |= XML_PARSE_NODICT;
13810
13811 if (doc->encoding != NULL) {
13812 xmlCharEncodingHandlerPtr hdlr;
13813
13814 if (ctxt->encoding != NULL)
13815 xmlFree((xmlChar *) ctxt->encoding);
13816 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13817
13818 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13819 if (hdlr != NULL) {
13820 xmlSwitchToEncoding(ctxt, hdlr);
13821 } else {
13822 return(XML_ERR_UNSUPPORTED_ENCODING);
13823 }
13824 }
13825
13826 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13827 xmlDetectSAX2(ctxt);
13828 ctxt->myDoc = doc;
13829 /* parsing in context, i.e. as within existing content */
13830 ctxt->instate = XML_PARSER_CONTENT;
13831
13832 fake = xmlNewComment(NULL);
13833 if (fake == NULL) {
13834 xmlFreeParserCtxt(ctxt);
13835 return(XML_ERR_NO_MEMORY);
13836 }
13837 xmlAddChild(node, fake);
13838
13839 if (node->type == XML_ELEMENT_NODE) {
13840 nodePush(ctxt, node);
13841 /*
13842 * initialize the SAX2 namespaces stack
13843 */
13844 cur = node;
13845 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13846 xmlNsPtr ns = cur->nsDef;
13847 const xmlChar *iprefix, *ihref;
13848
13849 while (ns != NULL) {
13850 if (ctxt->dict) {
13851 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13852 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13853 } else {
13854 iprefix = ns->prefix;
13855 ihref = ns->href;
13856 }
13857
13858 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13859 nsPush(ctxt, iprefix, ihref);
13860 nsnr++;
13861 }
13862 ns = ns->next;
13863 }
13864 cur = cur->parent;
13865 }
13866 }
13867
13868 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13869 /*
13870 * ID/IDREF registration will be done in xmlValidateElement below
13871 */
13872 ctxt->loadsubset |= XML_SKIP_IDS;
13873 }
13874
13875 #ifdef LIBXML_HTML_ENABLED
13876 if (doc->type == XML_HTML_DOCUMENT_NODE)
13877 __htmlParseContent(ctxt);
13878 else
13879 #endif
13880 xmlParseContent(ctxt);
13881
13882 nsPop(ctxt, nsnr);
13883 if ((RAW == '<') && (NXT(1) == '/')) {
13884 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13885 } else if (RAW != 0) {
13886 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13887 }
13888 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13889 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13890 ctxt->wellFormed = 0;
13891 }
13892
13893 if (!ctxt->wellFormed) {
13894 if (ctxt->errNo == 0)
13895 ret = XML_ERR_INTERNAL_ERROR;
13896 else
13897 ret = (xmlParserErrors)ctxt->errNo;
13898 } else {
13899 ret = XML_ERR_OK;
13900 }
13901
13902 /*
13903 * Return the newly created nodeset after unlinking it from
13904 * the pseudo sibling.
13905 */
13906
13907 cur = fake->next;
13908 fake->next = NULL;
13909 node->last = fake;
13910
13911 if (cur != NULL) {
13912 cur->prev = NULL;
13913 }
13914
13915 *lst = cur;
13916
13917 while (cur != NULL) {
13918 cur->parent = NULL;
13919 cur = cur->next;
13920 }
13921
13922 xmlUnlinkNode(fake);
13923 xmlFreeNode(fake);
13924
13925
13926 if (ret != XML_ERR_OK) {
13927 xmlFreeNodeList(*lst);
13928 *lst = NULL;
13929 }
13930
13931 if (doc->dict != NULL)
13932 ctxt->dict = NULL;
13933 xmlFreeParserCtxt(ctxt);
13934
13935 return(ret);
13936 #else /* !SAX2 */
13937 return(XML_ERR_INTERNAL_ERROR);
13938 #endif
13939 }
13940
13941 #ifdef LIBXML_SAX1_ENABLED
13942 /**
13943 * xmlParseBalancedChunkMemoryRecover:
13944 * @doc: the document the chunk pertains to
13945 * @sax: the SAX handler bloc (possibly NULL)
13946 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13947 * @depth: Used for loop detection, use 0
13948 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13949 * @lst: the return value for the set of parsed nodes
13950 * @recover: return nodes even if the data is broken (use 0)
13951 *
13952 *
13953 * Parse a well-balanced chunk of an XML document
13954 * called by the parser
13955 * The allowed sequence for the Well Balanced Chunk is the one defined by
13956 * the content production in the XML grammar:
13957 *
13958 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13959 *
13960 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13961 * the parser error code otherwise
13962 *
13963 * In case recover is set to 1, the nodelist will not be empty even if
13964 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13965 * some extent.
13966 */
13967 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst,int recover)13968 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13969 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13970 int recover) {
13971 xmlParserCtxtPtr ctxt;
13972 xmlDocPtr newDoc;
13973 xmlSAXHandlerPtr oldsax = NULL;
13974 xmlNodePtr content, newRoot;
13975 int size;
13976 int ret = 0;
13977
13978 if (depth > 40) {
13979 return(XML_ERR_ENTITY_LOOP);
13980 }
13981
13982
13983 if (lst != NULL)
13984 *lst = NULL;
13985 if (string == NULL)
13986 return(-1);
13987
13988 size = xmlStrlen(string);
13989
13990 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13991 if (ctxt == NULL) return(-1);
13992 ctxt->userData = ctxt;
13993 if (sax != NULL) {
13994 oldsax = ctxt->sax;
13995 ctxt->sax = sax;
13996 if (user_data != NULL)
13997 ctxt->userData = user_data;
13998 }
13999 newDoc = xmlNewDoc(BAD_CAST "1.0");
14000 if (newDoc == NULL) {
14001 xmlFreeParserCtxt(ctxt);
14002 return(-1);
14003 }
14004 newDoc->properties = XML_DOC_INTERNAL;
14005 if ((doc != NULL) && (doc->dict != NULL)) {
14006 xmlDictFree(ctxt->dict);
14007 ctxt->dict = doc->dict;
14008 xmlDictReference(ctxt->dict);
14009 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
14010 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
14011 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
14012 ctxt->dictNames = 1;
14013 } else {
14014 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
14015 }
14016 if (doc != NULL) {
14017 newDoc->intSubset = doc->intSubset;
14018 newDoc->extSubset = doc->extSubset;
14019 }
14020 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
14021 if (newRoot == NULL) {
14022 if (sax != NULL)
14023 ctxt->sax = oldsax;
14024 xmlFreeParserCtxt(ctxt);
14025 newDoc->intSubset = NULL;
14026 newDoc->extSubset = NULL;
14027 xmlFreeDoc(newDoc);
14028 return(-1);
14029 }
14030 xmlAddChild((xmlNodePtr) newDoc, newRoot);
14031 nodePush(ctxt, newRoot);
14032 if (doc == NULL) {
14033 ctxt->myDoc = newDoc;
14034 } else {
14035 ctxt->myDoc = newDoc;
14036 newDoc->children->doc = doc;
14037 /* Ensure that doc has XML spec namespace */
14038 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
14039 newDoc->oldNs = doc->oldNs;
14040 }
14041 ctxt->instate = XML_PARSER_CONTENT;
14042 ctxt->depth = depth;
14043
14044 /*
14045 * Doing validity checking on chunk doesn't make sense
14046 */
14047 ctxt->validate = 0;
14048 ctxt->loadsubset = 0;
14049 xmlDetectSAX2(ctxt);
14050
14051 if ( doc != NULL ){
14052 content = doc->children;
14053 doc->children = NULL;
14054 xmlParseContent(ctxt);
14055 doc->children = content;
14056 }
14057 else {
14058 xmlParseContent(ctxt);
14059 }
14060 if ((RAW == '<') && (NXT(1) == '/')) {
14061 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
14062 } else if (RAW != 0) {
14063 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
14064 }
14065 if (ctxt->node != newDoc->children) {
14066 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
14067 }
14068
14069 if (!ctxt->wellFormed) {
14070 if (ctxt->errNo == 0)
14071 ret = 1;
14072 else
14073 ret = ctxt->errNo;
14074 } else {
14075 ret = 0;
14076 }
14077
14078 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
14079 xmlNodePtr cur;
14080
14081 /*
14082 * Return the newly created nodeset after unlinking it from
14083 * they pseudo parent.
14084 */
14085 cur = newDoc->children->children;
14086 *lst = cur;
14087 while (cur != NULL) {
14088 xmlSetTreeDoc(cur, doc);
14089 cur->parent = NULL;
14090 cur = cur->next;
14091 }
14092 newDoc->children->children = NULL;
14093 }
14094
14095 if (sax != NULL)
14096 ctxt->sax = oldsax;
14097 xmlFreeParserCtxt(ctxt);
14098 newDoc->intSubset = NULL;
14099 newDoc->extSubset = NULL;
14100 newDoc->oldNs = NULL;
14101 xmlFreeDoc(newDoc);
14102
14103 return(ret);
14104 }
14105
14106 /**
14107 * xmlSAXParseEntity:
14108 * @sax: the SAX handler block
14109 * @filename: the filename
14110 *
14111 * parse an XML external entity out of context and build a tree.
14112 * It use the given SAX function block to handle the parsing callback.
14113 * If sax is NULL, fallback to the default DOM tree building routines.
14114 *
14115 * [78] extParsedEnt ::= TextDecl? content
14116 *
14117 * This correspond to a "Well Balanced" chunk
14118 *
14119 * Returns the resulting document tree
14120 */
14121
14122 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)14123 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
14124 xmlDocPtr ret;
14125 xmlParserCtxtPtr ctxt;
14126
14127 ctxt = xmlCreateFileParserCtxt(filename);
14128 if (ctxt == NULL) {
14129 return(NULL);
14130 }
14131 if (sax != NULL) {
14132 if (ctxt->sax != NULL)
14133 xmlFree(ctxt->sax);
14134 ctxt->sax = sax;
14135 ctxt->userData = NULL;
14136 }
14137
14138 xmlParseExtParsedEnt(ctxt);
14139
14140 if (ctxt->wellFormed)
14141 ret = ctxt->myDoc;
14142 else {
14143 ret = NULL;
14144 xmlFreeDoc(ctxt->myDoc);
14145 ctxt->myDoc = NULL;
14146 }
14147 if (sax != NULL)
14148 ctxt->sax = NULL;
14149 xmlFreeParserCtxt(ctxt);
14150
14151 return(ret);
14152 }
14153
14154 /**
14155 * xmlParseEntity:
14156 * @filename: the filename
14157 *
14158 * parse an XML external entity out of context and build a tree.
14159 *
14160 * [78] extParsedEnt ::= TextDecl? content
14161 *
14162 * This correspond to a "Well Balanced" chunk
14163 *
14164 * Returns the resulting document tree
14165 */
14166
14167 xmlDocPtr
xmlParseEntity(const char * filename)14168 xmlParseEntity(const char *filename) {
14169 return(xmlSAXParseEntity(NULL, filename));
14170 }
14171 #endif /* LIBXML_SAX1_ENABLED */
14172
14173 /**
14174 * xmlCreateEntityParserCtxtInternal:
14175 * @URL: the entity URL
14176 * @ID: the entity PUBLIC ID
14177 * @base: a possible base for the target URI
14178 * @pctx: parser context used to set options on new context
14179 *
14180 * Create a parser context for an external entity
14181 * Automatic support for ZLIB/Compress compressed document is provided
14182 * by default if found at compile-time.
14183 *
14184 * Returns the new parser context or NULL
14185 */
14186 static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(const xmlChar * URL,const xmlChar * ID,const xmlChar * base,xmlParserCtxtPtr pctx)14187 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
14188 const xmlChar *base, xmlParserCtxtPtr pctx) {
14189 xmlParserCtxtPtr ctxt;
14190 xmlParserInputPtr inputStream;
14191 char *directory = NULL;
14192 xmlChar *uri;
14193
14194 ctxt = xmlNewParserCtxt();
14195 if (ctxt == NULL) {
14196 return(NULL);
14197 }
14198
14199 if (pctx != NULL) {
14200 ctxt->options = pctx->options;
14201 ctxt->_private = pctx->_private;
14202 }
14203
14204 uri = xmlBuildURI(URL, base);
14205
14206 if (uri == NULL) {
14207 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14208 if (inputStream == NULL) {
14209 xmlFreeParserCtxt(ctxt);
14210 return(NULL);
14211 }
14212
14213 inputPush(ctxt, inputStream);
14214
14215 if ((ctxt->directory == NULL) && (directory == NULL))
14216 directory = xmlParserGetDirectory((char *)URL);
14217 if ((ctxt->directory == NULL) && (directory != NULL))
14218 ctxt->directory = directory;
14219 } else {
14220 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14221 if (inputStream == NULL) {
14222 xmlFree(uri);
14223 xmlFreeParserCtxt(ctxt);
14224 return(NULL);
14225 }
14226
14227 inputPush(ctxt, inputStream);
14228
14229 if ((ctxt->directory == NULL) && (directory == NULL))
14230 directory = xmlParserGetDirectory((char *)uri);
14231 if ((ctxt->directory == NULL) && (directory != NULL))
14232 ctxt->directory = directory;
14233 xmlFree(uri);
14234 }
14235 return(ctxt);
14236 }
14237
14238 /**
14239 * xmlCreateEntityParserCtxt:
14240 * @URL: the entity URL
14241 * @ID: the entity PUBLIC ID
14242 * @base: a possible base for the target URI
14243 *
14244 * Create a parser context for an external entity
14245 * Automatic support for ZLIB/Compress compressed document is provided
14246 * by default if found at compile-time.
14247 *
14248 * Returns the new parser context or NULL
14249 */
14250 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)14251 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14252 const xmlChar *base) {
14253 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14254
14255 }
14256
14257 /************************************************************************
14258 * *
14259 * Front ends when parsing from a file *
14260 * *
14261 ************************************************************************/
14262
14263 /**
14264 * xmlCreateURLParserCtxt:
14265 * @filename: the filename or URL
14266 * @options: a combination of xmlParserOption
14267 *
14268 * Create a parser context for a file or URL content.
14269 * Automatic support for ZLIB/Compress compressed document is provided
14270 * by default if found at compile-time and for file accesses
14271 *
14272 * Returns the new parser context or NULL
14273 */
14274 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)14275 xmlCreateURLParserCtxt(const char *filename, int options)
14276 {
14277 xmlParserCtxtPtr ctxt;
14278 xmlParserInputPtr inputStream;
14279 char *directory = NULL;
14280
14281 ctxt = xmlNewParserCtxt();
14282 if (ctxt == NULL) {
14283 xmlErrMemory(NULL, "cannot allocate parser context");
14284 return(NULL);
14285 }
14286
14287 if (options)
14288 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14289 ctxt->linenumbers = 1;
14290
14291 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14292 if (inputStream == NULL) {
14293 xmlFreeParserCtxt(ctxt);
14294 return(NULL);
14295 }
14296
14297 inputPush(ctxt, inputStream);
14298 if ((ctxt->directory == NULL) && (directory == NULL))
14299 directory = xmlParserGetDirectory(filename);
14300 if ((ctxt->directory == NULL) && (directory != NULL))
14301 ctxt->directory = directory;
14302
14303 return(ctxt);
14304 }
14305
14306 /**
14307 * xmlCreateFileParserCtxt:
14308 * @filename: the filename
14309 *
14310 * Create a parser context for a file content.
14311 * Automatic support for ZLIB/Compress compressed document is provided
14312 * by default if found at compile-time.
14313 *
14314 * Returns the new parser context or NULL
14315 */
14316 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)14317 xmlCreateFileParserCtxt(const char *filename)
14318 {
14319 return(xmlCreateURLParserCtxt(filename, 0));
14320 }
14321
14322 #ifdef LIBXML_SAX1_ENABLED
14323 /**
14324 * xmlSAXParseFileWithData:
14325 * @sax: the SAX handler block
14326 * @filename: the filename
14327 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14328 * documents
14329 * @data: the userdata
14330 *
14331 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14332 * compressed document is provided by default if found at compile-time.
14333 * It use the given SAX function block to handle the parsing callback.
14334 * If sax is NULL, fallback to the default DOM tree building routines.
14335 *
14336 * User data (void *) is stored within the parser context in the
14337 * context's _private member, so it is available nearly everywhere in libxml
14338 *
14339 * Returns the resulting document tree
14340 */
14341
14342 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)14343 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14344 int recovery, void *data) {
14345 xmlDocPtr ret;
14346 xmlParserCtxtPtr ctxt;
14347
14348 xmlInitParser();
14349
14350 ctxt = xmlCreateFileParserCtxt(filename);
14351 if (ctxt == NULL) {
14352 return(NULL);
14353 }
14354 if (sax != NULL) {
14355 if (ctxt->sax != NULL)
14356 xmlFree(ctxt->sax);
14357 ctxt->sax = sax;
14358 }
14359 xmlDetectSAX2(ctxt);
14360 if (data!=NULL) {
14361 ctxt->_private = data;
14362 }
14363
14364 if (ctxt->directory == NULL)
14365 ctxt->directory = xmlParserGetDirectory(filename);
14366
14367 ctxt->recovery = recovery;
14368
14369 xmlParseDocument(ctxt);
14370
14371 if ((ctxt->wellFormed) || recovery) {
14372 ret = ctxt->myDoc;
14373 if (ret != NULL) {
14374 if (ctxt->input->buf->compressed > 0)
14375 ret->compression = 9;
14376 else
14377 ret->compression = ctxt->input->buf->compressed;
14378 }
14379 }
14380 else {
14381 ret = NULL;
14382 xmlFreeDoc(ctxt->myDoc);
14383 ctxt->myDoc = NULL;
14384 }
14385 if (sax != NULL)
14386 ctxt->sax = NULL;
14387 xmlFreeParserCtxt(ctxt);
14388
14389 return(ret);
14390 }
14391
14392 /**
14393 * xmlSAXParseFile:
14394 * @sax: the SAX handler block
14395 * @filename: the filename
14396 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14397 * documents
14398 *
14399 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14400 * compressed document is provided by default if found at compile-time.
14401 * It use the given SAX function block to handle the parsing callback.
14402 * If sax is NULL, fallback to the default DOM tree building routines.
14403 *
14404 * Returns the resulting document tree
14405 */
14406
14407 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)14408 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14409 int recovery) {
14410 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14411 }
14412
14413 /**
14414 * xmlRecoverDoc:
14415 * @cur: a pointer to an array of xmlChar
14416 *
14417 * parse an XML in-memory document and build a tree.
14418 * In the case the document is not Well Formed, a attempt to build a
14419 * tree is tried anyway
14420 *
14421 * Returns the resulting document tree or NULL in case of failure
14422 */
14423
14424 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)14425 xmlRecoverDoc(const xmlChar *cur) {
14426 return(xmlSAXParseDoc(NULL, cur, 1));
14427 }
14428
14429 /**
14430 * xmlParseFile:
14431 * @filename: the filename
14432 *
14433 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14434 * compressed document is provided by default if found at compile-time.
14435 *
14436 * Returns the resulting document tree if the file was wellformed,
14437 * NULL otherwise.
14438 */
14439
14440 xmlDocPtr
xmlParseFile(const char * filename)14441 xmlParseFile(const char *filename) {
14442 return(xmlSAXParseFile(NULL, filename, 0));
14443 }
14444
14445 /**
14446 * xmlRecoverFile:
14447 * @filename: the filename
14448 *
14449 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14450 * compressed document is provided by default if found at compile-time.
14451 * In the case the document is not Well Formed, it attempts to build
14452 * a tree anyway
14453 *
14454 * Returns the resulting document tree or NULL in case of failure
14455 */
14456
14457 xmlDocPtr
xmlRecoverFile(const char * filename)14458 xmlRecoverFile(const char *filename) {
14459 return(xmlSAXParseFile(NULL, filename, 1));
14460 }
14461
14462
14463 /**
14464 * xmlSetupParserForBuffer:
14465 * @ctxt: an XML parser context
14466 * @buffer: a xmlChar * buffer
14467 * @filename: a file name
14468 *
14469 * Setup the parser context to parse a new buffer; Clears any prior
14470 * contents from the parser context. The buffer parameter must not be
14471 * NULL, but the filename parameter can be
14472 */
14473 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)14474 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14475 const char* filename)
14476 {
14477 xmlParserInputPtr input;
14478
14479 if ((ctxt == NULL) || (buffer == NULL))
14480 return;
14481
14482 input = xmlNewInputStream(ctxt);
14483 if (input == NULL) {
14484 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14485 xmlClearParserCtxt(ctxt);
14486 return;
14487 }
14488
14489 xmlClearParserCtxt(ctxt);
14490 if (filename != NULL)
14491 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14492 input->base = buffer;
14493 input->cur = buffer;
14494 input->end = &buffer[xmlStrlen(buffer)];
14495 inputPush(ctxt, input);
14496 }
14497
14498 /**
14499 * xmlSAXUserParseFile:
14500 * @sax: a SAX handler
14501 * @user_data: The user data returned on SAX callbacks
14502 * @filename: a file name
14503 *
14504 * parse an XML file and call the given SAX handler routines.
14505 * Automatic support for ZLIB/Compress compressed document is provided
14506 *
14507 * Returns 0 in case of success or a error number otherwise
14508 */
14509 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)14510 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14511 const char *filename) {
14512 int ret = 0;
14513 xmlParserCtxtPtr ctxt;
14514
14515 ctxt = xmlCreateFileParserCtxt(filename);
14516 if (ctxt == NULL) return -1;
14517 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14518 xmlFree(ctxt->sax);
14519 ctxt->sax = sax;
14520 xmlDetectSAX2(ctxt);
14521
14522 if (user_data != NULL)
14523 ctxt->userData = user_data;
14524
14525 xmlParseDocument(ctxt);
14526
14527 if (ctxt->wellFormed)
14528 ret = 0;
14529 else {
14530 if (ctxt->errNo != 0)
14531 ret = ctxt->errNo;
14532 else
14533 ret = -1;
14534 }
14535 if (sax != NULL)
14536 ctxt->sax = NULL;
14537 if (ctxt->myDoc != NULL) {
14538 xmlFreeDoc(ctxt->myDoc);
14539 ctxt->myDoc = NULL;
14540 }
14541 xmlFreeParserCtxt(ctxt);
14542
14543 return ret;
14544 }
14545 #endif /* LIBXML_SAX1_ENABLED */
14546
14547 /************************************************************************
14548 * *
14549 * Front ends when parsing from memory *
14550 * *
14551 ************************************************************************/
14552
14553 /**
14554 * xmlCreateMemoryParserCtxt:
14555 * @buffer: a pointer to a char array
14556 * @size: the size of the array
14557 *
14558 * Create a parser context for an XML in-memory document.
14559 *
14560 * Returns the new parser context or NULL
14561 */
14562 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)14563 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14564 xmlParserCtxtPtr ctxt;
14565 xmlParserInputPtr input;
14566 xmlParserInputBufferPtr buf;
14567
14568 if (buffer == NULL)
14569 return(NULL);
14570 if (size <= 0)
14571 return(NULL);
14572
14573 ctxt = xmlNewParserCtxt();
14574 if (ctxt == NULL)
14575 return(NULL);
14576
14577 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14578 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14579 if (buf == NULL) {
14580 xmlFreeParserCtxt(ctxt);
14581 return(NULL);
14582 }
14583
14584 input = xmlNewInputStream(ctxt);
14585 if (input == NULL) {
14586 xmlFreeParserInputBuffer(buf);
14587 xmlFreeParserCtxt(ctxt);
14588 return(NULL);
14589 }
14590
14591 input->filename = NULL;
14592 input->buf = buf;
14593 xmlBufResetInput(input->buf->buffer, input);
14594
14595 inputPush(ctxt, input);
14596 return(ctxt);
14597 }
14598
14599 #ifdef LIBXML_SAX1_ENABLED
14600 /**
14601 * xmlSAXParseMemoryWithData:
14602 * @sax: the SAX handler block
14603 * @buffer: an pointer to a char array
14604 * @size: the size of the array
14605 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14606 * documents
14607 * @data: the userdata
14608 *
14609 * parse an XML in-memory block and use the given SAX function block
14610 * to handle the parsing callback. If sax is NULL, fallback to the default
14611 * DOM tree building routines.
14612 *
14613 * User data (void *) is stored within the parser context in the
14614 * context's _private member, so it is available nearly everywhere in libxml
14615 *
14616 * Returns the resulting document tree
14617 */
14618
14619 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)14620 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14621 int size, int recovery, void *data) {
14622 xmlDocPtr ret;
14623 xmlParserCtxtPtr ctxt;
14624
14625 xmlInitParser();
14626
14627 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14628 if (ctxt == NULL) return(NULL);
14629 if (sax != NULL) {
14630 if (ctxt->sax != NULL)
14631 xmlFree(ctxt->sax);
14632 ctxt->sax = sax;
14633 }
14634 xmlDetectSAX2(ctxt);
14635 if (data!=NULL) {
14636 ctxt->_private=data;
14637 }
14638
14639 ctxt->recovery = recovery;
14640
14641 xmlParseDocument(ctxt);
14642
14643 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14644 else {
14645 ret = NULL;
14646 xmlFreeDoc(ctxt->myDoc);
14647 ctxt->myDoc = NULL;
14648 }
14649 if (sax != NULL)
14650 ctxt->sax = NULL;
14651 xmlFreeParserCtxt(ctxt);
14652
14653 return(ret);
14654 }
14655
14656 /**
14657 * xmlSAXParseMemory:
14658 * @sax: the SAX handler block
14659 * @buffer: an pointer to a char array
14660 * @size: the size of the array
14661 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14662 * documents
14663 *
14664 * parse an XML in-memory block and use the given SAX function block
14665 * to handle the parsing callback. If sax is NULL, fallback to the default
14666 * DOM tree building routines.
14667 *
14668 * Returns the resulting document tree
14669 */
14670 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)14671 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14672 int size, int recovery) {
14673 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14674 }
14675
14676 /**
14677 * xmlParseMemory:
14678 * @buffer: an pointer to a char array
14679 * @size: the size of the array
14680 *
14681 * parse an XML in-memory block and build a tree.
14682 *
14683 * Returns the resulting document tree
14684 */
14685
xmlParseMemory(const char * buffer,int size)14686 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14687 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14688 }
14689
14690 /**
14691 * xmlRecoverMemory:
14692 * @buffer: an pointer to a char array
14693 * @size: the size of the array
14694 *
14695 * parse an XML in-memory block and build a tree.
14696 * In the case the document is not Well Formed, an attempt to
14697 * build a tree is tried anyway
14698 *
14699 * Returns the resulting document tree or NULL in case of error
14700 */
14701
xmlRecoverMemory(const char * buffer,int size)14702 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14703 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14704 }
14705
14706 /**
14707 * xmlSAXUserParseMemory:
14708 * @sax: a SAX handler
14709 * @user_data: The user data returned on SAX callbacks
14710 * @buffer: an in-memory XML document input
14711 * @size: the length of the XML document in bytes
14712 *
14713 * A better SAX parsing routine.
14714 * parse an XML in-memory buffer and call the given SAX handler routines.
14715 *
14716 * Returns 0 in case of success or a error number otherwise
14717 */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)14718 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14719 const char *buffer, int size) {
14720 int ret = 0;
14721 xmlParserCtxtPtr ctxt;
14722
14723 xmlInitParser();
14724
14725 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14726 if (ctxt == NULL) return -1;
14727 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14728 xmlFree(ctxt->sax);
14729 ctxt->sax = sax;
14730 xmlDetectSAX2(ctxt);
14731
14732 if (user_data != NULL)
14733 ctxt->userData = user_data;
14734
14735 xmlParseDocument(ctxt);
14736
14737 if (ctxt->wellFormed)
14738 ret = 0;
14739 else {
14740 if (ctxt->errNo != 0)
14741 ret = ctxt->errNo;
14742 else
14743 ret = -1;
14744 }
14745 if (sax != NULL)
14746 ctxt->sax = NULL;
14747 if (ctxt->myDoc != NULL) {
14748 xmlFreeDoc(ctxt->myDoc);
14749 ctxt->myDoc = NULL;
14750 }
14751 xmlFreeParserCtxt(ctxt);
14752
14753 return ret;
14754 }
14755 #endif /* LIBXML_SAX1_ENABLED */
14756
14757 /**
14758 * xmlCreateDocParserCtxt:
14759 * @cur: a pointer to an array of xmlChar
14760 *
14761 * Creates a parser context for an XML in-memory document.
14762 *
14763 * Returns the new parser context or NULL
14764 */
14765 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * cur)14766 xmlCreateDocParserCtxt(const xmlChar *cur) {
14767 int len;
14768
14769 if (cur == NULL)
14770 return(NULL);
14771 len = xmlStrlen(cur);
14772 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14773 }
14774
14775 #ifdef LIBXML_SAX1_ENABLED
14776 /**
14777 * xmlSAXParseDoc:
14778 * @sax: the SAX handler block
14779 * @cur: a pointer to an array of xmlChar
14780 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14781 * documents
14782 *
14783 * parse an XML in-memory document and build a tree.
14784 * It use the given SAX function block to handle the parsing callback.
14785 * If sax is NULL, fallback to the default DOM tree building routines.
14786 *
14787 * Returns the resulting document tree
14788 */
14789
14790 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)14791 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14792 xmlDocPtr ret;
14793 xmlParserCtxtPtr ctxt;
14794 xmlSAXHandlerPtr oldsax = NULL;
14795
14796 if (cur == NULL) return(NULL);
14797
14798
14799 ctxt = xmlCreateDocParserCtxt(cur);
14800 if (ctxt == NULL) return(NULL);
14801 if (sax != NULL) {
14802 oldsax = ctxt->sax;
14803 ctxt->sax = sax;
14804 ctxt->userData = NULL;
14805 }
14806 xmlDetectSAX2(ctxt);
14807
14808 xmlParseDocument(ctxt);
14809 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14810 else {
14811 ret = NULL;
14812 xmlFreeDoc(ctxt->myDoc);
14813 ctxt->myDoc = NULL;
14814 }
14815 if (sax != NULL)
14816 ctxt->sax = oldsax;
14817 xmlFreeParserCtxt(ctxt);
14818
14819 return(ret);
14820 }
14821
14822 /**
14823 * xmlParseDoc:
14824 * @cur: a pointer to an array of xmlChar
14825 *
14826 * parse an XML in-memory document and build a tree.
14827 *
14828 * Returns the resulting document tree
14829 */
14830
14831 xmlDocPtr
xmlParseDoc(const xmlChar * cur)14832 xmlParseDoc(const xmlChar *cur) {
14833 return(xmlSAXParseDoc(NULL, cur, 0));
14834 }
14835 #endif /* LIBXML_SAX1_ENABLED */
14836
14837 #ifdef LIBXML_LEGACY_ENABLED
14838 /************************************************************************
14839 * *
14840 * Specific function to keep track of entities references *
14841 * and used by the XSLT debugger *
14842 * *
14843 ************************************************************************/
14844
14845 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14846
14847 /**
14848 * xmlAddEntityReference:
14849 * @ent : A valid entity
14850 * @firstNode : A valid first node for children of entity
14851 * @lastNode : A valid last node of children entity
14852 *
14853 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14854 */
14855 static void
xmlAddEntityReference(xmlEntityPtr ent,xmlNodePtr firstNode,xmlNodePtr lastNode)14856 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14857 xmlNodePtr lastNode)
14858 {
14859 if (xmlEntityRefFunc != NULL) {
14860 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14861 }
14862 }
14863
14864
14865 /**
14866 * xmlSetEntityReferenceFunc:
14867 * @func: A valid function
14868 *
14869 * Set the function to call call back when a xml reference has been made
14870 */
14871 void
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)14872 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14873 {
14874 xmlEntityRefFunc = func;
14875 }
14876 #endif /* LIBXML_LEGACY_ENABLED */
14877
14878 /************************************************************************
14879 * *
14880 * Miscellaneous *
14881 * *
14882 ************************************************************************/
14883
14884 #ifdef LIBXML_XPATH_ENABLED
14885 #include <libxml/xpath.h>
14886 #endif
14887
14888 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14889 static int xmlParserInitialized = 0;
14890
14891 /**
14892 * xmlInitParser:
14893 *
14894 * Initialization function for the XML parser.
14895 * This is not reentrant. Call once before processing in case of
14896 * use in multithreaded programs.
14897 */
14898
14899 void
xmlInitParser(void)14900 xmlInitParser(void) {
14901 if (xmlParserInitialized != 0)
14902 return;
14903
14904 #ifdef LIBXML_THREAD_ENABLED
14905 __xmlGlobalInitMutexLock();
14906 if (xmlParserInitialized == 0) {
14907 #endif
14908 xmlInitThreads();
14909 xmlInitGlobals();
14910 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14911 (xmlGenericError == NULL))
14912 initGenericErrorDefaultFunc(NULL);
14913 xmlInitMemory();
14914 xmlInitializeDict();
14915 xmlInitCharEncodingHandlers();
14916 xmlDefaultSAXHandlerInit();
14917 xmlRegisterDefaultInputCallbacks();
14918 #ifdef LIBXML_OUTPUT_ENABLED
14919 xmlRegisterDefaultOutputCallbacks();
14920 #endif /* LIBXML_OUTPUT_ENABLED */
14921 #ifdef LIBXML_HTML_ENABLED
14922 htmlInitAutoClose();
14923 htmlDefaultSAXHandlerInit();
14924 #endif
14925 #ifdef LIBXML_XPATH_ENABLED
14926 xmlXPathInit();
14927 #endif
14928 xmlParserInitialized = 1;
14929 #ifdef LIBXML_THREAD_ENABLED
14930 }
14931 __xmlGlobalInitMutexUnlock();
14932 #endif
14933 }
14934
14935 /**
14936 * xmlCleanupParser:
14937 *
14938 * This function name is somewhat misleading. It does not clean up
14939 * parser state, it cleans up memory allocated by the library itself.
14940 * It is a cleanup function for the XML library. It tries to reclaim all
14941 * related global memory allocated for the library processing.
14942 * It doesn't deallocate any document related memory. One should
14943 * call xmlCleanupParser() only when the process has finished using
14944 * the library and all XML/HTML documents built with it.
14945 * See also xmlInitParser() which has the opposite function of preparing
14946 * the library for operations.
14947 *
14948 * WARNING: if your application is multithreaded or has plugin support
14949 * calling this may crash the application if another thread or
14950 * a plugin is still using libxml2. It's sometimes very hard to
14951 * guess if libxml2 is in use in the application, some libraries
14952 * or plugins may use it without notice. In case of doubt abstain
14953 * from calling this function or do it just before calling exit()
14954 * to avoid leak reports from valgrind !
14955 */
14956
14957 void
xmlCleanupParser(void)14958 xmlCleanupParser(void) {
14959 if (!xmlParserInitialized)
14960 return;
14961
14962 xmlCleanupCharEncodingHandlers();
14963 #ifdef LIBXML_CATALOG_ENABLED
14964 xmlCatalogCleanup();
14965 #endif
14966 xmlDictCleanup();
14967 xmlCleanupInputCallbacks();
14968 #ifdef LIBXML_OUTPUT_ENABLED
14969 xmlCleanupOutputCallbacks();
14970 #endif
14971 #ifdef LIBXML_SCHEMAS_ENABLED
14972 xmlSchemaCleanupTypes();
14973 xmlRelaxNGCleanupTypes();
14974 #endif
14975 xmlResetLastError();
14976 xmlCleanupGlobals();
14977 xmlCleanupThreads(); /* must be last if called not from the main thread */
14978 xmlCleanupMemory();
14979 xmlParserInitialized = 0;
14980 }
14981
14982 /************************************************************************
14983 * *
14984 * New set (2.6.0) of simpler and more flexible APIs *
14985 * *
14986 ************************************************************************/
14987
14988 /**
14989 * DICT_FREE:
14990 * @str: a string
14991 *
14992 * Free a string if it is not owned by the "dict" dictionnary in the
14993 * current scope
14994 */
14995 #define DICT_FREE(str) \
14996 if ((str) && ((!dict) || \
14997 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14998 xmlFree((char *)(str));
14999
15000 /**
15001 * xmlCtxtReset:
15002 * @ctxt: an XML parser context
15003 *
15004 * Reset a parser context
15005 */
15006 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)15007 xmlCtxtReset(xmlParserCtxtPtr ctxt)
15008 {
15009 xmlParserInputPtr input;
15010 xmlDictPtr dict;
15011
15012 if (ctxt == NULL)
15013 return;
15014
15015 dict = ctxt->dict;
15016
15017 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
15018 xmlFreeInputStream(input);
15019 }
15020 ctxt->inputNr = 0;
15021 ctxt->input = NULL;
15022
15023 ctxt->spaceNr = 0;
15024 if (ctxt->spaceTab != NULL) {
15025 ctxt->spaceTab[0] = -1;
15026 ctxt->space = &ctxt->spaceTab[0];
15027 } else {
15028 ctxt->space = NULL;
15029 }
15030
15031
15032 ctxt->nodeNr = 0;
15033 ctxt->node = NULL;
15034
15035 ctxt->nameNr = 0;
15036 ctxt->name = NULL;
15037
15038 DICT_FREE(ctxt->version);
15039 ctxt->version = NULL;
15040 DICT_FREE(ctxt->encoding);
15041 ctxt->encoding = NULL;
15042 DICT_FREE(ctxt->directory);
15043 ctxt->directory = NULL;
15044 DICT_FREE(ctxt->extSubURI);
15045 ctxt->extSubURI = NULL;
15046 DICT_FREE(ctxt->extSubSystem);
15047 ctxt->extSubSystem = NULL;
15048 if (ctxt->myDoc != NULL)
15049 xmlFreeDoc(ctxt->myDoc);
15050 ctxt->myDoc = NULL;
15051
15052 ctxt->standalone = -1;
15053 ctxt->hasExternalSubset = 0;
15054 ctxt->hasPErefs = 0;
15055 ctxt->html = 0;
15056 ctxt->external = 0;
15057 ctxt->instate = XML_PARSER_START;
15058 ctxt->token = 0;
15059
15060 ctxt->wellFormed = 1;
15061 ctxt->nsWellFormed = 1;
15062 ctxt->disableSAX = 0;
15063 ctxt->valid = 1;
15064 #if 0
15065 ctxt->vctxt.userData = ctxt;
15066 ctxt->vctxt.error = xmlParserValidityError;
15067 ctxt->vctxt.warning = xmlParserValidityWarning;
15068 #endif
15069 ctxt->record_info = 0;
15070 ctxt->nbChars = 0;
15071 ctxt->checkIndex = 0;
15072 ctxt->inSubset = 0;
15073 ctxt->errNo = XML_ERR_OK;
15074 ctxt->depth = 0;
15075 ctxt->charset = XML_CHAR_ENCODING_UTF8;
15076 ctxt->catalogs = NULL;
15077 ctxt->nbentities = 0;
15078 ctxt->sizeentities = 0;
15079 ctxt->sizeentcopy = 0;
15080 xmlInitNodeInfoSeq(&ctxt->node_seq);
15081
15082 if (ctxt->attsDefault != NULL) {
15083 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
15084 ctxt->attsDefault = NULL;
15085 }
15086 if (ctxt->attsSpecial != NULL) {
15087 xmlHashFree(ctxt->attsSpecial, NULL);
15088 ctxt->attsSpecial = NULL;
15089 }
15090
15091 #ifdef LIBXML_CATALOG_ENABLED
15092 if (ctxt->catalogs != NULL)
15093 xmlCatalogFreeLocal(ctxt->catalogs);
15094 #endif
15095 if (ctxt->lastError.code != XML_ERR_OK)
15096 xmlResetError(&ctxt->lastError);
15097 }
15098
15099 /**
15100 * xmlCtxtResetPush:
15101 * @ctxt: an XML parser context
15102 * @chunk: a pointer to an array of chars
15103 * @size: number of chars in the array
15104 * @filename: an optional file name or URI
15105 * @encoding: the document encoding, or NULL
15106 *
15107 * Reset a push parser context
15108 *
15109 * Returns 0 in case of success and 1 in case of error
15110 */
15111 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)15112 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
15113 int size, const char *filename, const char *encoding)
15114 {
15115 xmlParserInputPtr inputStream;
15116 xmlParserInputBufferPtr buf;
15117 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
15118
15119 if (ctxt == NULL)
15120 return(1);
15121
15122 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
15123 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
15124
15125 buf = xmlAllocParserInputBuffer(enc);
15126 if (buf == NULL)
15127 return(1);
15128
15129 if (ctxt == NULL) {
15130 xmlFreeParserInputBuffer(buf);
15131 return(1);
15132 }
15133
15134 xmlCtxtReset(ctxt);
15135
15136 if (ctxt->pushTab == NULL) {
15137 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
15138 sizeof(xmlChar *));
15139 if (ctxt->pushTab == NULL) {
15140 xmlErrMemory(ctxt, NULL);
15141 xmlFreeParserInputBuffer(buf);
15142 return(1);
15143 }
15144 }
15145
15146 if (filename == NULL) {
15147 ctxt->directory = NULL;
15148 } else {
15149 ctxt->directory = xmlParserGetDirectory(filename);
15150 }
15151
15152 inputStream = xmlNewInputStream(ctxt);
15153 if (inputStream == NULL) {
15154 xmlFreeParserInputBuffer(buf);
15155 return(1);
15156 }
15157
15158 if (filename == NULL)
15159 inputStream->filename = NULL;
15160 else
15161 inputStream->filename = (char *)
15162 xmlCanonicPath((const xmlChar *) filename);
15163 inputStream->buf = buf;
15164 xmlBufResetInput(buf->buffer, inputStream);
15165
15166 inputPush(ctxt, inputStream);
15167
15168 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
15169 (ctxt->input->buf != NULL)) {
15170 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
15171 size_t cur = ctxt->input->cur - ctxt->input->base;
15172
15173 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15174
15175 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
15176 #ifdef DEBUG_PUSH
15177 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15178 #endif
15179 }
15180
15181 if (encoding != NULL) {
15182 xmlCharEncodingHandlerPtr hdlr;
15183
15184 if (ctxt->encoding != NULL)
15185 xmlFree((xmlChar *) ctxt->encoding);
15186 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15187
15188 hdlr = xmlFindCharEncodingHandler(encoding);
15189 if (hdlr != NULL) {
15190 xmlSwitchToEncoding(ctxt, hdlr);
15191 } else {
15192 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15193 "Unsupported encoding %s\n", BAD_CAST encoding);
15194 }
15195 } else if (enc != XML_CHAR_ENCODING_NONE) {
15196 xmlSwitchEncoding(ctxt, enc);
15197 }
15198
15199 return(0);
15200 }
15201
15202
15203 /**
15204 * xmlCtxtUseOptionsInternal:
15205 * @ctxt: an XML parser context
15206 * @options: a combination of xmlParserOption
15207 * @encoding: the user provided encoding to use
15208 *
15209 * Applies the options to the parser context
15210 *
15211 * Returns 0 in case of success, the set of unknown or unimplemented options
15212 * in case of error.
15213 */
15214 static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt,int options,const char * encoding)15215 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15216 {
15217 if (ctxt == NULL)
15218 return(-1);
15219 if (encoding != NULL) {
15220 if (ctxt->encoding != NULL)
15221 xmlFree((xmlChar *) ctxt->encoding);
15222 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15223 }
15224 if (options & XML_PARSE_RECOVER) {
15225 ctxt->recovery = 1;
15226 options -= XML_PARSE_RECOVER;
15227 ctxt->options |= XML_PARSE_RECOVER;
15228 } else
15229 ctxt->recovery = 0;
15230 if (options & XML_PARSE_DTDLOAD) {
15231 ctxt->loadsubset = XML_DETECT_IDS;
15232 options -= XML_PARSE_DTDLOAD;
15233 ctxt->options |= XML_PARSE_DTDLOAD;
15234 } else
15235 ctxt->loadsubset = 0;
15236 if (options & XML_PARSE_DTDATTR) {
15237 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15238 options -= XML_PARSE_DTDATTR;
15239 ctxt->options |= XML_PARSE_DTDATTR;
15240 }
15241 if (options & XML_PARSE_NOENT) {
15242 ctxt->replaceEntities = 1;
15243 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15244 options -= XML_PARSE_NOENT;
15245 ctxt->options |= XML_PARSE_NOENT;
15246 } else
15247 ctxt->replaceEntities = 0;
15248 if (options & XML_PARSE_PEDANTIC) {
15249 ctxt->pedantic = 1;
15250 options -= XML_PARSE_PEDANTIC;
15251 ctxt->options |= XML_PARSE_PEDANTIC;
15252 } else
15253 ctxt->pedantic = 0;
15254 if (options & XML_PARSE_NOBLANKS) {
15255 ctxt->keepBlanks = 0;
15256 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15257 options -= XML_PARSE_NOBLANKS;
15258 ctxt->options |= XML_PARSE_NOBLANKS;
15259 } else
15260 ctxt->keepBlanks = 1;
15261 if (options & XML_PARSE_DTDVALID) {
15262 ctxt->validate = 1;
15263 if (options & XML_PARSE_NOWARNING)
15264 ctxt->vctxt.warning = NULL;
15265 if (options & XML_PARSE_NOERROR)
15266 ctxt->vctxt.error = NULL;
15267 options -= XML_PARSE_DTDVALID;
15268 ctxt->options |= XML_PARSE_DTDVALID;
15269 } else
15270 ctxt->validate = 0;
15271 if (options & XML_PARSE_NOWARNING) {
15272 ctxt->sax->warning = NULL;
15273 options -= XML_PARSE_NOWARNING;
15274 }
15275 if (options & XML_PARSE_NOERROR) {
15276 ctxt->sax->error = NULL;
15277 ctxt->sax->fatalError = NULL;
15278 options -= XML_PARSE_NOERROR;
15279 }
15280 #ifdef LIBXML_SAX1_ENABLED
15281 if (options & XML_PARSE_SAX1) {
15282 ctxt->sax->startElement = xmlSAX2StartElement;
15283 ctxt->sax->endElement = xmlSAX2EndElement;
15284 ctxt->sax->startElementNs = NULL;
15285 ctxt->sax->endElementNs = NULL;
15286 ctxt->sax->initialized = 1;
15287 options -= XML_PARSE_SAX1;
15288 ctxt->options |= XML_PARSE_SAX1;
15289 }
15290 #endif /* LIBXML_SAX1_ENABLED */
15291 if (options & XML_PARSE_NODICT) {
15292 ctxt->dictNames = 0;
15293 options -= XML_PARSE_NODICT;
15294 ctxt->options |= XML_PARSE_NODICT;
15295 } else {
15296 ctxt->dictNames = 1;
15297 }
15298 if (options & XML_PARSE_NOCDATA) {
15299 ctxt->sax->cdataBlock = NULL;
15300 options -= XML_PARSE_NOCDATA;
15301 ctxt->options |= XML_PARSE_NOCDATA;
15302 }
15303 if (options & XML_PARSE_NSCLEAN) {
15304 ctxt->options |= XML_PARSE_NSCLEAN;
15305 options -= XML_PARSE_NSCLEAN;
15306 }
15307 if (options & XML_PARSE_NONET) {
15308 ctxt->options |= XML_PARSE_NONET;
15309 options -= XML_PARSE_NONET;
15310 }
15311 if (options & XML_PARSE_COMPACT) {
15312 ctxt->options |= XML_PARSE_COMPACT;
15313 options -= XML_PARSE_COMPACT;
15314 }
15315 if (options & XML_PARSE_OLD10) {
15316 ctxt->options |= XML_PARSE_OLD10;
15317 options -= XML_PARSE_OLD10;
15318 }
15319 if (options & XML_PARSE_NOBASEFIX) {
15320 ctxt->options |= XML_PARSE_NOBASEFIX;
15321 options -= XML_PARSE_NOBASEFIX;
15322 }
15323 if (options & XML_PARSE_HUGE) {
15324 ctxt->options |= XML_PARSE_HUGE;
15325 options -= XML_PARSE_HUGE;
15326 if (ctxt->dict != NULL)
15327 xmlDictSetLimit(ctxt->dict, 0);
15328 }
15329 if (options & XML_PARSE_OLDSAX) {
15330 ctxt->options |= XML_PARSE_OLDSAX;
15331 options -= XML_PARSE_OLDSAX;
15332 }
15333 if (options & XML_PARSE_IGNORE_ENC) {
15334 ctxt->options |= XML_PARSE_IGNORE_ENC;
15335 options -= XML_PARSE_IGNORE_ENC;
15336 }
15337 if (options & XML_PARSE_BIG_LINES) {
15338 ctxt->options |= XML_PARSE_BIG_LINES;
15339 options -= XML_PARSE_BIG_LINES;
15340 }
15341 ctxt->linenumbers = 1;
15342 return (options);
15343 }
15344
15345 /**
15346 * xmlCtxtUseOptions:
15347 * @ctxt: an XML parser context
15348 * @options: a combination of xmlParserOption
15349 *
15350 * Applies the options to the parser context
15351 *
15352 * Returns 0 in case of success, the set of unknown or unimplemented options
15353 * in case of error.
15354 */
15355 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)15356 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15357 {
15358 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15359 }
15360
15361 /**
15362 * xmlDoRead:
15363 * @ctxt: an XML parser context
15364 * @URL: the base URL to use for the document
15365 * @encoding: the document encoding, or NULL
15366 * @options: a combination of xmlParserOption
15367 * @reuse: keep the context for reuse
15368 *
15369 * Common front-end for the xmlRead functions
15370 *
15371 * Returns the resulting document tree or NULL
15372 */
15373 static xmlDocPtr
xmlDoRead(xmlParserCtxtPtr ctxt,const char * URL,const char * encoding,int options,int reuse)15374 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15375 int options, int reuse)
15376 {
15377 xmlDocPtr ret;
15378
15379 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15380 if (encoding != NULL) {
15381 xmlCharEncodingHandlerPtr hdlr;
15382
15383 hdlr = xmlFindCharEncodingHandler(encoding);
15384 if (hdlr != NULL)
15385 xmlSwitchToEncoding(ctxt, hdlr);
15386 }
15387 if ((URL != NULL) && (ctxt->input != NULL) &&
15388 (ctxt->input->filename == NULL))
15389 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15390 xmlParseDocument(ctxt);
15391 if ((ctxt->wellFormed) || ctxt->recovery)
15392 ret = ctxt->myDoc;
15393 else {
15394 ret = NULL;
15395 if (ctxt->myDoc != NULL) {
15396 xmlFreeDoc(ctxt->myDoc);
15397 }
15398 }
15399 ctxt->myDoc = NULL;
15400 if (!reuse) {
15401 xmlFreeParserCtxt(ctxt);
15402 }
15403
15404 return (ret);
15405 }
15406
15407 /**
15408 * xmlReadDoc:
15409 * @cur: a pointer to a zero terminated string
15410 * @URL: the base URL to use for the document
15411 * @encoding: the document encoding, or NULL
15412 * @options: a combination of xmlParserOption
15413 *
15414 * parse an XML in-memory document and build a tree.
15415 *
15416 * Returns the resulting document tree
15417 */
15418 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)15419 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15420 {
15421 xmlParserCtxtPtr ctxt;
15422
15423 if (cur == NULL)
15424 return (NULL);
15425 xmlInitParser();
15426
15427 ctxt = xmlCreateDocParserCtxt(cur);
15428 if (ctxt == NULL)
15429 return (NULL);
15430 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15431 }
15432
15433 /**
15434 * xmlReadFile:
15435 * @filename: a file or URL
15436 * @encoding: the document encoding, or NULL
15437 * @options: a combination of xmlParserOption
15438 *
15439 * parse an XML file from the filesystem or the network.
15440 *
15441 * Returns the resulting document tree
15442 */
15443 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)15444 xmlReadFile(const char *filename, const char *encoding, int options)
15445 {
15446 xmlParserCtxtPtr ctxt;
15447
15448 xmlInitParser();
15449 ctxt = xmlCreateURLParserCtxt(filename, options);
15450 if (ctxt == NULL)
15451 return (NULL);
15452 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15453 }
15454
15455 /**
15456 * xmlReadMemory:
15457 * @buffer: a pointer to a char array
15458 * @size: the size of the array
15459 * @URL: the base URL to use for the document
15460 * @encoding: the document encoding, or NULL
15461 * @options: a combination of xmlParserOption
15462 *
15463 * parse an XML in-memory document and build a tree.
15464 *
15465 * Returns the resulting document tree
15466 */
15467 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * URL,const char * encoding,int options)15468 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15469 {
15470 xmlParserCtxtPtr ctxt;
15471
15472 xmlInitParser();
15473 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15474 if (ctxt == NULL)
15475 return (NULL);
15476 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15477 }
15478
15479 /**
15480 * xmlReadFd:
15481 * @fd: an open file descriptor
15482 * @URL: the base URL to use for the document
15483 * @encoding: the document encoding, or NULL
15484 * @options: a combination of xmlParserOption
15485 *
15486 * parse an XML from a file descriptor and build a tree.
15487 * NOTE that the file descriptor will not be closed when the
15488 * reader is closed or reset.
15489 *
15490 * Returns the resulting document tree
15491 */
15492 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)15493 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15494 {
15495 xmlParserCtxtPtr ctxt;
15496 xmlParserInputBufferPtr input;
15497 xmlParserInputPtr stream;
15498
15499 if (fd < 0)
15500 return (NULL);
15501 xmlInitParser();
15502
15503 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15504 if (input == NULL)
15505 return (NULL);
15506 input->closecallback = NULL;
15507 ctxt = xmlNewParserCtxt();
15508 if (ctxt == NULL) {
15509 xmlFreeParserInputBuffer(input);
15510 return (NULL);
15511 }
15512 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15513 if (stream == NULL) {
15514 xmlFreeParserInputBuffer(input);
15515 xmlFreeParserCtxt(ctxt);
15516 return (NULL);
15517 }
15518 inputPush(ctxt, stream);
15519 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15520 }
15521
15522 /**
15523 * xmlReadIO:
15524 * @ioread: an I/O read function
15525 * @ioclose: an I/O close function
15526 * @ioctx: an I/O handler
15527 * @URL: the base URL to use for the document
15528 * @encoding: the document encoding, or NULL
15529 * @options: a combination of xmlParserOption
15530 *
15531 * parse an XML document from I/O functions and source and build a tree.
15532 *
15533 * Returns the resulting document tree
15534 */
15535 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15536 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15537 void *ioctx, const char *URL, const char *encoding, int options)
15538 {
15539 xmlParserCtxtPtr ctxt;
15540 xmlParserInputBufferPtr input;
15541 xmlParserInputPtr stream;
15542
15543 if (ioread == NULL)
15544 return (NULL);
15545 xmlInitParser();
15546
15547 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15548 XML_CHAR_ENCODING_NONE);
15549 if (input == NULL) {
15550 if (ioclose != NULL)
15551 ioclose(ioctx);
15552 return (NULL);
15553 }
15554 ctxt = xmlNewParserCtxt();
15555 if (ctxt == NULL) {
15556 xmlFreeParserInputBuffer(input);
15557 return (NULL);
15558 }
15559 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15560 if (stream == NULL) {
15561 xmlFreeParserInputBuffer(input);
15562 xmlFreeParserCtxt(ctxt);
15563 return (NULL);
15564 }
15565 inputPush(ctxt, stream);
15566 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15567 }
15568
15569 /**
15570 * xmlCtxtReadDoc:
15571 * @ctxt: an XML parser context
15572 * @cur: a pointer to a zero terminated string
15573 * @URL: the base URL to use for the document
15574 * @encoding: the document encoding, or NULL
15575 * @options: a combination of xmlParserOption
15576 *
15577 * parse an XML in-memory document and build a tree.
15578 * This reuses the existing @ctxt parser context
15579 *
15580 * Returns the resulting document tree
15581 */
15582 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * cur,const char * URL,const char * encoding,int options)15583 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15584 const char *URL, const char *encoding, int options)
15585 {
15586 xmlParserInputPtr stream;
15587
15588 if (cur == NULL)
15589 return (NULL);
15590 if (ctxt == NULL)
15591 return (NULL);
15592 xmlInitParser();
15593
15594 xmlCtxtReset(ctxt);
15595
15596 stream = xmlNewStringInputStream(ctxt, cur);
15597 if (stream == NULL) {
15598 return (NULL);
15599 }
15600 inputPush(ctxt, stream);
15601 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15602 }
15603
15604 /**
15605 * xmlCtxtReadFile:
15606 * @ctxt: an XML parser context
15607 * @filename: a file or URL
15608 * @encoding: the document encoding, or NULL
15609 * @options: a combination of xmlParserOption
15610 *
15611 * parse an XML file from the filesystem or the network.
15612 * This reuses the existing @ctxt parser context
15613 *
15614 * Returns the resulting document tree
15615 */
15616 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)15617 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15618 const char *encoding, int options)
15619 {
15620 xmlParserInputPtr stream;
15621
15622 if (filename == NULL)
15623 return (NULL);
15624 if (ctxt == NULL)
15625 return (NULL);
15626 xmlInitParser();
15627
15628 xmlCtxtReset(ctxt);
15629
15630 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15631 if (stream == NULL) {
15632 return (NULL);
15633 }
15634 inputPush(ctxt, stream);
15635 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15636 }
15637
15638 /**
15639 * xmlCtxtReadMemory:
15640 * @ctxt: an XML parser context
15641 * @buffer: a pointer to a char array
15642 * @size: the size of the array
15643 * @URL: the base URL to use for the document
15644 * @encoding: the document encoding, or NULL
15645 * @options: a combination of xmlParserOption
15646 *
15647 * parse an XML in-memory document and build a tree.
15648 * This reuses the existing @ctxt parser context
15649 *
15650 * Returns the resulting document tree
15651 */
15652 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)15653 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15654 const char *URL, const char *encoding, int options)
15655 {
15656 xmlParserInputBufferPtr input;
15657 xmlParserInputPtr stream;
15658
15659 if (ctxt == NULL)
15660 return (NULL);
15661 if (buffer == NULL)
15662 return (NULL);
15663 xmlInitParser();
15664
15665 xmlCtxtReset(ctxt);
15666
15667 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15668 if (input == NULL) {
15669 return(NULL);
15670 }
15671
15672 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15673 if (stream == NULL) {
15674 xmlFreeParserInputBuffer(input);
15675 return(NULL);
15676 }
15677
15678 inputPush(ctxt, stream);
15679 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15680 }
15681
15682 /**
15683 * xmlCtxtReadFd:
15684 * @ctxt: an XML parser context
15685 * @fd: an open file descriptor
15686 * @URL: the base URL to use for the document
15687 * @encoding: the document encoding, or NULL
15688 * @options: a combination of xmlParserOption
15689 *
15690 * parse an XML from a file descriptor and build a tree.
15691 * This reuses the existing @ctxt parser context
15692 * NOTE that the file descriptor will not be closed when the
15693 * reader is closed or reset.
15694 *
15695 * Returns the resulting document tree
15696 */
15697 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)15698 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15699 const char *URL, const char *encoding, int options)
15700 {
15701 xmlParserInputBufferPtr input;
15702 xmlParserInputPtr stream;
15703
15704 if (fd < 0)
15705 return (NULL);
15706 if (ctxt == NULL)
15707 return (NULL);
15708 xmlInitParser();
15709
15710 xmlCtxtReset(ctxt);
15711
15712
15713 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15714 if (input == NULL)
15715 return (NULL);
15716 input->closecallback = NULL;
15717 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15718 if (stream == NULL) {
15719 xmlFreeParserInputBuffer(input);
15720 return (NULL);
15721 }
15722 inputPush(ctxt, stream);
15723 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15724 }
15725
15726 /**
15727 * xmlCtxtReadIO:
15728 * @ctxt: an XML parser context
15729 * @ioread: an I/O read function
15730 * @ioclose: an I/O close function
15731 * @ioctx: an I/O handler
15732 * @URL: the base URL to use for the document
15733 * @encoding: the document encoding, or NULL
15734 * @options: a combination of xmlParserOption
15735 *
15736 * parse an XML document from I/O functions and source and build a tree.
15737 * This reuses the existing @ctxt parser context
15738 *
15739 * Returns the resulting document tree
15740 */
15741 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15742 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15743 xmlInputCloseCallback ioclose, void *ioctx,
15744 const char *URL,
15745 const char *encoding, int options)
15746 {
15747 xmlParserInputBufferPtr input;
15748 xmlParserInputPtr stream;
15749
15750 if (ioread == NULL)
15751 return (NULL);
15752 if (ctxt == NULL)
15753 return (NULL);
15754 xmlInitParser();
15755
15756 xmlCtxtReset(ctxt);
15757
15758 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15759 XML_CHAR_ENCODING_NONE);
15760 if (input == NULL) {
15761 if (ioclose != NULL)
15762 ioclose(ioctx);
15763 return (NULL);
15764 }
15765 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15766 if (stream == NULL) {
15767 xmlFreeParserInputBuffer(input);
15768 return (NULL);
15769 }
15770 inputPush(ctxt, stream);
15771 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15772 }
15773
15774 #define bottom_parser
15775 #include "elfgcchack.h"
15776