1 /*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * daniel@veillard.com
31 */
32
33 #define IN_LIBXML
34 #include "libxml.h"
35
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
38 #else
39 #define XML_DIR_SEP '/'
40 #endif
41
42 #include <stdlib.h>
43 #include <limits.h>
44 #include <string.h>
45 #include <stdarg.h>
46 #include <libxml/xmlmemory.h>
47 #include <libxml/threads.h>
48 #include <libxml/globals.h>
49 #include <libxml/tree.h>
50 #include <libxml/parser.h>
51 #include <libxml/parserInternals.h>
52 #include <libxml/valid.h>
53 #include <libxml/entities.h>
54 #include <libxml/xmlerror.h>
55 #include <libxml/encoding.h>
56 #include <libxml/xmlIO.h>
57 #include <libxml/uri.h>
58 #ifdef LIBXML_CATALOG_ENABLED
59 #include <libxml/catalog.h>
60 #endif
61 #ifdef LIBXML_SCHEMAS_ENABLED
62 #include <libxml/xmlschemastypes.h>
63 #include <libxml/relaxng.h>
64 #endif
65 #ifdef HAVE_CTYPE_H
66 #include <ctype.h>
67 #endif
68 #ifdef HAVE_STDLIB_H
69 #include <stdlib.h>
70 #endif
71 #ifdef HAVE_SYS_STAT_H
72 #include <sys/stat.h>
73 #endif
74 #ifdef HAVE_FCNTL_H
75 #include <fcntl.h>
76 #endif
77 #ifdef HAVE_UNISTD_H
78 #include <unistd.h>
79 #endif
80 #ifdef HAVE_ZLIB_H
81 #include <zlib.h>
82 #endif
83 #ifdef HAVE_LZMA_H
84 #include <lzma.h>
85 #endif
86
87 #include "buf.h"
88 #include "enc.h"
89
90 static void
91 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
93 static xmlParserCtxtPtr
94 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
97 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
98
99 /************************************************************************
100 * *
101 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
102 * *
103 ************************************************************************/
104
105 #define XML_PARSER_BIG_ENTITY 1000
106 #define XML_PARSER_LOT_ENTITY 5000
107
108 /*
109 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
110 * replacement over the size in byte of the input indicates that you have
111 * and eponential behaviour. A value of 10 correspond to at least 3 entity
112 * replacement per byte of input.
113 */
114 #define XML_PARSER_NON_LINEAR 10
115
116 /*
117 * xmlParserEntityCheck
118 *
119 * Function to check non-linear entity expansion behaviour
120 * This is here to detect and stop exponential linear entity expansion
121 * This is not a limitation of the parser but a safety
122 * boundary feature. It can be disabled with the XML_PARSE_HUGE
123 * parser option.
124 */
125 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,size_t size,xmlEntityPtr ent,size_t replacement)126 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
127 xmlEntityPtr ent, size_t replacement)
128 {
129 size_t consumed = 0;
130
131 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
132 return (0);
133 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
134 return (1);
135
136 /*
137 * This may look absurd but is needed to detect
138 * entities problems
139 */
140 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
141 (ent->content != NULL) && (ent->checked == 0) &&
142 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
143 unsigned long oldnbent = ctxt->nbentities;
144 xmlChar *rep;
145
146 ent->checked = 1;
147
148 ++ctxt->depth;
149 rep = xmlStringDecodeEntities(ctxt, ent->content,
150 XML_SUBSTITUTE_REF, 0, 0, 0);
151 --ctxt->depth;
152 if (ctxt->errNo == XML_ERR_ENTITY_LOOP) {
153 ent->content[0] = 0;
154 }
155
156 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
157 if (rep != NULL) {
158 if (xmlStrchr(rep, '<'))
159 ent->checked |= 1;
160 xmlFree(rep);
161 rep = NULL;
162 }
163 }
164 if (replacement != 0) {
165 if (replacement < XML_MAX_TEXT_LENGTH)
166 return(0);
167
168 /*
169 * If the volume of entity copy reaches 10 times the
170 * amount of parsed data and over the large text threshold
171 * then that's very likely to be an abuse.
172 */
173 if (ctxt->input != NULL) {
174 consumed = ctxt->input->consumed +
175 (ctxt->input->cur - ctxt->input->base);
176 }
177 consumed += ctxt->sizeentities;
178
179 if (replacement < XML_PARSER_NON_LINEAR * consumed)
180 return(0);
181 } else if (size != 0) {
182 /*
183 * Do the check based on the replacement size of the entity
184 */
185 if (size < XML_PARSER_BIG_ENTITY)
186 return(0);
187
188 /*
189 * A limit on the amount of text data reasonably used
190 */
191 if (ctxt->input != NULL) {
192 consumed = ctxt->input->consumed +
193 (ctxt->input->cur - ctxt->input->base);
194 }
195 consumed += ctxt->sizeentities;
196
197 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
198 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
199 return (0);
200 } else if (ent != NULL) {
201 /*
202 * use the number of parsed entities in the replacement
203 */
204 size = ent->checked / 2;
205
206 /*
207 * The amount of data parsed counting entities size only once
208 */
209 if (ctxt->input != NULL) {
210 consumed = ctxt->input->consumed +
211 (ctxt->input->cur - ctxt->input->base);
212 }
213 consumed += ctxt->sizeentities;
214
215 /*
216 * Check the density of entities for the amount of data
217 * knowing an entity reference will take at least 3 bytes
218 */
219 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
220 return (0);
221 } else {
222 /*
223 * strange we got no data for checking
224 */
225 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
226 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
227 (ctxt->nbentities <= 10000))
228 return (0);
229 }
230 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
231 return (1);
232 }
233
234 /**
235 * xmlParserMaxDepth:
236 *
237 * arbitrary depth limit for the XML documents that we allow to
238 * process. This is not a limitation of the parser but a safety
239 * boundary feature. It can be disabled with the XML_PARSE_HUGE
240 * parser option.
241 */
242 unsigned int xmlParserMaxDepth = 256;
243
244
245
246 #define SAX2 1
247 #define XML_PARSER_BIG_BUFFER_SIZE 300
248 #define XML_PARSER_BUFFER_SIZE 100
249 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
250
251 /**
252 * XML_PARSER_CHUNK_SIZE
253 *
254 * When calling GROW that's the minimal amount of data
255 * the parser expected to have received. It is not a hard
256 * limit but an optimization when reading strings like Names
257 * It is not strictly needed as long as inputs available characters
258 * are followed by 0, which should be provided by the I/O level
259 */
260 #define XML_PARSER_CHUNK_SIZE 100
261
262 /*
263 * List of XML prefixed PI allowed by W3C specs
264 */
265
266 static const char *xmlW3CPIs[] = {
267 "xml-stylesheet",
268 "xml-model",
269 NULL
270 };
271
272
273 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
274 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
275 const xmlChar **str);
276
277 static xmlParserErrors
278 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
279 xmlSAXHandlerPtr sax,
280 void *user_data, int depth, const xmlChar *URL,
281 const xmlChar *ID, xmlNodePtr *list);
282
283 static int
284 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
285 const char *encoding);
286 #ifdef LIBXML_LEGACY_ENABLED
287 static void
288 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
289 xmlNodePtr lastNode);
290 #endif /* LIBXML_LEGACY_ENABLED */
291
292 static xmlParserErrors
293 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
294 const xmlChar *string, void *user_data, xmlNodePtr *lst);
295
296 static int
297 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
298
299 /************************************************************************
300 * *
301 * Some factorized error routines *
302 * *
303 ************************************************************************/
304
305 /**
306 * xmlErrAttributeDup:
307 * @ctxt: an XML parser context
308 * @prefix: the attribute prefix
309 * @localname: the attribute localname
310 *
311 * Handle a redefinition of attribute error
312 */
313 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)314 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
315 const xmlChar * localname)
316 {
317 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
318 (ctxt->instate == XML_PARSER_EOF))
319 return;
320 if (ctxt != NULL)
321 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
322
323 if (prefix == NULL)
324 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
325 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
326 (const char *) localname, NULL, NULL, 0, 0,
327 "Attribute %s redefined\n", localname);
328 else
329 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
330 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
331 (const char *) prefix, (const char *) localname,
332 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
333 localname);
334 if (ctxt != NULL) {
335 ctxt->wellFormed = 0;
336 if (ctxt->recovery == 0)
337 ctxt->disableSAX = 1;
338 }
339 }
340
341 /**
342 * xmlFatalErr:
343 * @ctxt: an XML parser context
344 * @error: the error number
345 * @extra: extra information string
346 *
347 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
348 */
349 static void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * info)350 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
351 {
352 const char *errmsg;
353
354 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
355 (ctxt->instate == XML_PARSER_EOF))
356 return;
357 switch (error) {
358 case XML_ERR_INVALID_HEX_CHARREF:
359 errmsg = "CharRef: invalid hexadecimal value";
360 break;
361 case XML_ERR_INVALID_DEC_CHARREF:
362 errmsg = "CharRef: invalid decimal value";
363 break;
364 case XML_ERR_INVALID_CHARREF:
365 errmsg = "CharRef: invalid value";
366 break;
367 case XML_ERR_INTERNAL_ERROR:
368 errmsg = "internal error";
369 break;
370 case XML_ERR_PEREF_AT_EOF:
371 errmsg = "PEReference at end of document";
372 break;
373 case XML_ERR_PEREF_IN_PROLOG:
374 errmsg = "PEReference in prolog";
375 break;
376 case XML_ERR_PEREF_IN_EPILOG:
377 errmsg = "PEReference in epilog";
378 break;
379 case XML_ERR_PEREF_NO_NAME:
380 errmsg = "PEReference: no name";
381 break;
382 case XML_ERR_PEREF_SEMICOL_MISSING:
383 errmsg = "PEReference: expecting ';'";
384 break;
385 case XML_ERR_ENTITY_LOOP:
386 errmsg = "Detected an entity reference loop";
387 break;
388 case XML_ERR_ENTITY_NOT_STARTED:
389 errmsg = "EntityValue: \" or ' expected";
390 break;
391 case XML_ERR_ENTITY_PE_INTERNAL:
392 errmsg = "PEReferences forbidden in internal subset";
393 break;
394 case XML_ERR_ENTITY_NOT_FINISHED:
395 errmsg = "EntityValue: \" or ' expected";
396 break;
397 case XML_ERR_ATTRIBUTE_NOT_STARTED:
398 errmsg = "AttValue: \" or ' expected";
399 break;
400 case XML_ERR_LT_IN_ATTRIBUTE:
401 errmsg = "Unescaped '<' not allowed in attributes values";
402 break;
403 case XML_ERR_LITERAL_NOT_STARTED:
404 errmsg = "SystemLiteral \" or ' expected";
405 break;
406 case XML_ERR_LITERAL_NOT_FINISHED:
407 errmsg = "Unfinished System or Public ID \" or ' expected";
408 break;
409 case XML_ERR_MISPLACED_CDATA_END:
410 errmsg = "Sequence ']]>' not allowed in content";
411 break;
412 case XML_ERR_URI_REQUIRED:
413 errmsg = "SYSTEM or PUBLIC, the URI is missing";
414 break;
415 case XML_ERR_PUBID_REQUIRED:
416 errmsg = "PUBLIC, the Public Identifier is missing";
417 break;
418 case XML_ERR_HYPHEN_IN_COMMENT:
419 errmsg = "Comment must not contain '--' (double-hyphen)";
420 break;
421 case XML_ERR_PI_NOT_STARTED:
422 errmsg = "xmlParsePI : no target name";
423 break;
424 case XML_ERR_RESERVED_XML_NAME:
425 errmsg = "Invalid PI name";
426 break;
427 case XML_ERR_NOTATION_NOT_STARTED:
428 errmsg = "NOTATION: Name expected here";
429 break;
430 case XML_ERR_NOTATION_NOT_FINISHED:
431 errmsg = "'>' required to close NOTATION declaration";
432 break;
433 case XML_ERR_VALUE_REQUIRED:
434 errmsg = "Entity value required";
435 break;
436 case XML_ERR_URI_FRAGMENT:
437 errmsg = "Fragment not allowed";
438 break;
439 case XML_ERR_ATTLIST_NOT_STARTED:
440 errmsg = "'(' required to start ATTLIST enumeration";
441 break;
442 case XML_ERR_NMTOKEN_REQUIRED:
443 errmsg = "NmToken expected in ATTLIST enumeration";
444 break;
445 case XML_ERR_ATTLIST_NOT_FINISHED:
446 errmsg = "')' required to finish ATTLIST enumeration";
447 break;
448 case XML_ERR_MIXED_NOT_STARTED:
449 errmsg = "MixedContentDecl : '|' or ')*' expected";
450 break;
451 case XML_ERR_PCDATA_REQUIRED:
452 errmsg = "MixedContentDecl : '#PCDATA' expected";
453 break;
454 case XML_ERR_ELEMCONTENT_NOT_STARTED:
455 errmsg = "ContentDecl : Name or '(' expected";
456 break;
457 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
458 errmsg = "ContentDecl : ',' '|' or ')' expected";
459 break;
460 case XML_ERR_PEREF_IN_INT_SUBSET:
461 errmsg =
462 "PEReference: forbidden within markup decl in internal subset";
463 break;
464 case XML_ERR_GT_REQUIRED:
465 errmsg = "expected '>'";
466 break;
467 case XML_ERR_CONDSEC_INVALID:
468 errmsg = "XML conditional section '[' expected";
469 break;
470 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
471 errmsg = "Content error in the external subset";
472 break;
473 case XML_ERR_CONDSEC_INVALID_KEYWORD:
474 errmsg =
475 "conditional section INCLUDE or IGNORE keyword expected";
476 break;
477 case XML_ERR_CONDSEC_NOT_FINISHED:
478 errmsg = "XML conditional section not closed";
479 break;
480 case XML_ERR_XMLDECL_NOT_STARTED:
481 errmsg = "Text declaration '<?xml' required";
482 break;
483 case XML_ERR_XMLDECL_NOT_FINISHED:
484 errmsg = "parsing XML declaration: '?>' expected";
485 break;
486 case XML_ERR_EXT_ENTITY_STANDALONE:
487 errmsg = "external parsed entities cannot be standalone";
488 break;
489 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
490 errmsg = "EntityRef: expecting ';'";
491 break;
492 case XML_ERR_DOCTYPE_NOT_FINISHED:
493 errmsg = "DOCTYPE improperly terminated";
494 break;
495 case XML_ERR_LTSLASH_REQUIRED:
496 errmsg = "EndTag: '</' not found";
497 break;
498 case XML_ERR_EQUAL_REQUIRED:
499 errmsg = "expected '='";
500 break;
501 case XML_ERR_STRING_NOT_CLOSED:
502 errmsg = "String not closed expecting \" or '";
503 break;
504 case XML_ERR_STRING_NOT_STARTED:
505 errmsg = "String not started expecting ' or \"";
506 break;
507 case XML_ERR_ENCODING_NAME:
508 errmsg = "Invalid XML encoding name";
509 break;
510 case XML_ERR_STANDALONE_VALUE:
511 errmsg = "standalone accepts only 'yes' or 'no'";
512 break;
513 case XML_ERR_DOCUMENT_EMPTY:
514 errmsg = "Document is empty";
515 break;
516 case XML_ERR_DOCUMENT_END:
517 errmsg = "Extra content at the end of the document";
518 break;
519 case XML_ERR_NOT_WELL_BALANCED:
520 errmsg = "chunk is not well balanced";
521 break;
522 case XML_ERR_EXTRA_CONTENT:
523 errmsg = "extra content at the end of well balanced chunk";
524 break;
525 case XML_ERR_VERSION_MISSING:
526 errmsg = "Malformed declaration expecting version";
527 break;
528 case XML_ERR_NAME_TOO_LONG:
529 errmsg = "Name too long use XML_PARSE_HUGE option";
530 break;
531 #if 0
532 case:
533 errmsg = "";
534 break;
535 #endif
536 default:
537 errmsg = "Unregistered error message";
538 }
539 if (ctxt != NULL)
540 ctxt->errNo = error;
541 if (info == NULL) {
542 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
543 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
544 errmsg);
545 } else {
546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
547 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
548 errmsg, info);
549 }
550 if (ctxt != NULL) {
551 ctxt->wellFormed = 0;
552 if (ctxt->recovery == 0)
553 ctxt->disableSAX = 1;
554 }
555 }
556
557 /**
558 * xmlFatalErrMsg:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the error message
562 *
563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
564 */
565 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)566 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
567 const char *msg)
568 {
569 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
570 (ctxt->instate == XML_PARSER_EOF))
571 return;
572 if (ctxt != NULL)
573 ctxt->errNo = error;
574 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
575 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
576 if (ctxt != NULL) {
577 ctxt->wellFormed = 0;
578 if (ctxt->recovery == 0)
579 ctxt->disableSAX = 1;
580 }
581 }
582
583 /**
584 * xmlWarningMsg:
585 * @ctxt: an XML parser context
586 * @error: the error number
587 * @msg: the error message
588 * @str1: extra data
589 * @str2: extra data
590 *
591 * Handle a warning.
592 */
593 static void LIBXML_ATTR_FORMAT(3,0)
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)594 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
595 const char *msg, const xmlChar *str1, const xmlChar *str2)
596 {
597 xmlStructuredErrorFunc schannel = NULL;
598
599 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
600 (ctxt->instate == XML_PARSER_EOF))
601 return;
602 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
603 (ctxt->sax->initialized == XML_SAX2_MAGIC))
604 schannel = ctxt->sax->serror;
605 if (ctxt != NULL) {
606 __xmlRaiseError(schannel,
607 (ctxt->sax) ? ctxt->sax->warning : NULL,
608 ctxt->userData,
609 ctxt, NULL, XML_FROM_PARSER, error,
610 XML_ERR_WARNING, NULL, 0,
611 (const char *) str1, (const char *) str2, NULL, 0, 0,
612 msg, (const char *) str1, (const char *) str2);
613 } else {
614 __xmlRaiseError(schannel, NULL, NULL,
615 ctxt, NULL, XML_FROM_PARSER, error,
616 XML_ERR_WARNING, NULL, 0,
617 (const char *) str1, (const char *) str2, NULL, 0, 0,
618 msg, (const char *) str1, (const char *) str2);
619 }
620 }
621
622 /**
623 * xmlValidityError:
624 * @ctxt: an XML parser context
625 * @error: the error number
626 * @msg: the error message
627 * @str1: extra data
628 *
629 * Handle a validity error.
630 */
631 static void LIBXML_ATTR_FORMAT(3,0)
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)632 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
633 const char *msg, const xmlChar *str1, const xmlChar *str2)
634 {
635 xmlStructuredErrorFunc schannel = NULL;
636
637 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
638 (ctxt->instate == XML_PARSER_EOF))
639 return;
640 if (ctxt != NULL) {
641 ctxt->errNo = error;
642 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
643 schannel = ctxt->sax->serror;
644 }
645 if (ctxt != NULL) {
646 __xmlRaiseError(schannel,
647 ctxt->vctxt.error, ctxt->vctxt.userData,
648 ctxt, NULL, XML_FROM_DTD, error,
649 XML_ERR_ERROR, NULL, 0, (const char *) str1,
650 (const char *) str2, NULL, 0, 0,
651 msg, (const char *) str1, (const char *) str2);
652 ctxt->valid = 0;
653 } else {
654 __xmlRaiseError(schannel, NULL, NULL,
655 ctxt, NULL, XML_FROM_DTD, error,
656 XML_ERR_ERROR, NULL, 0, (const char *) str1,
657 (const char *) str2, NULL, 0, 0,
658 msg, (const char *) str1, (const char *) str2);
659 }
660 }
661
662 /**
663 * xmlFatalErrMsgInt:
664 * @ctxt: an XML parser context
665 * @error: the error number
666 * @msg: the error message
667 * @val: an integer value
668 *
669 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
670 */
671 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)672 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
673 const char *msg, int val)
674 {
675 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
676 (ctxt->instate == XML_PARSER_EOF))
677 return;
678 if (ctxt != NULL)
679 ctxt->errNo = error;
680 __xmlRaiseError(NULL, NULL, NULL,
681 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
682 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
683 if (ctxt != NULL) {
684 ctxt->wellFormed = 0;
685 if (ctxt->recovery == 0)
686 ctxt->disableSAX = 1;
687 }
688 }
689
690 /**
691 * xmlFatalErrMsgStrIntStr:
692 * @ctxt: an XML parser context
693 * @error: the error number
694 * @msg: the error message
695 * @str1: an string info
696 * @val: an integer value
697 * @str2: an string info
698 *
699 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
700 */
701 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)702 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
703 const char *msg, const xmlChar *str1, int val,
704 const xmlChar *str2)
705 {
706 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
707 (ctxt->instate == XML_PARSER_EOF))
708 return;
709 if (ctxt != NULL)
710 ctxt->errNo = error;
711 __xmlRaiseError(NULL, NULL, NULL,
712 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
713 NULL, 0, (const char *) str1, (const char *) str2,
714 NULL, val, 0, msg, str1, val, str2);
715 if (ctxt != NULL) {
716 ctxt->wellFormed = 0;
717 if (ctxt->recovery == 0)
718 ctxt->disableSAX = 1;
719 }
720 }
721
722 /**
723 * xmlFatalErrMsgStr:
724 * @ctxt: an XML parser context
725 * @error: the error number
726 * @msg: the error message
727 * @val: a string value
728 *
729 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
730 */
731 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)732 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
733 const char *msg, const xmlChar * val)
734 {
735 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
736 (ctxt->instate == XML_PARSER_EOF))
737 return;
738 if (ctxt != NULL)
739 ctxt->errNo = error;
740 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
741 XML_FROM_PARSER, error, XML_ERR_FATAL,
742 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
743 val);
744 if (ctxt != NULL) {
745 ctxt->wellFormed = 0;
746 if (ctxt->recovery == 0)
747 ctxt->disableSAX = 1;
748 }
749 }
750
751 /**
752 * xmlErrMsgStr:
753 * @ctxt: an XML parser context
754 * @error: the error number
755 * @msg: the error message
756 * @val: a string value
757 *
758 * Handle a non fatal parser error
759 */
760 static void LIBXML_ATTR_FORMAT(3,0)
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)761 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
762 const char *msg, const xmlChar * val)
763 {
764 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
765 (ctxt->instate == XML_PARSER_EOF))
766 return;
767 if (ctxt != NULL)
768 ctxt->errNo = error;
769 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
770 XML_FROM_PARSER, error, XML_ERR_ERROR,
771 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
772 val);
773 }
774
775 /**
776 * xmlNsErr:
777 * @ctxt: an XML parser context
778 * @error: the error number
779 * @msg: the message
780 * @info1: extra information string
781 * @info2: extra information string
782 *
783 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
784 */
785 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)786 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
787 const char *msg,
788 const xmlChar * info1, const xmlChar * info2,
789 const xmlChar * info3)
790 {
791 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
792 (ctxt->instate == XML_PARSER_EOF))
793 return;
794 if (ctxt != NULL)
795 ctxt->errNo = error;
796 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
797 XML_ERR_ERROR, NULL, 0, (const char *) info1,
798 (const char *) info2, (const char *) info3, 0, 0, msg,
799 info1, info2, info3);
800 if (ctxt != NULL)
801 ctxt->nsWellFormed = 0;
802 }
803
804 /**
805 * xmlNsWarn
806 * @ctxt: an XML parser context
807 * @error: the error number
808 * @msg: the message
809 * @info1: extra information string
810 * @info2: extra information string
811 *
812 * Handle a namespace warning error
813 */
814 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)815 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
816 const char *msg,
817 const xmlChar * info1, const xmlChar * info2,
818 const xmlChar * info3)
819 {
820 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
821 (ctxt->instate == XML_PARSER_EOF))
822 return;
823 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
824 XML_ERR_WARNING, NULL, 0, (const char *) info1,
825 (const char *) info2, (const char *) info3, 0, 0, msg,
826 info1, info2, info3);
827 }
828
829 /************************************************************************
830 * *
831 * Library wide options *
832 * *
833 ************************************************************************/
834
835 /**
836 * xmlHasFeature:
837 * @feature: the feature to be examined
838 *
839 * Examines if the library has been compiled with a given feature.
840 *
841 * Returns a non-zero value if the feature exist, otherwise zero.
842 * Returns zero (0) if the feature does not exist or an unknown
843 * unknown feature is requested, non-zero otherwise.
844 */
845 int
xmlHasFeature(xmlFeature feature)846 xmlHasFeature(xmlFeature feature)
847 {
848 switch (feature) {
849 case XML_WITH_THREAD:
850 #ifdef LIBXML_THREAD_ENABLED
851 return(1);
852 #else
853 return(0);
854 #endif
855 case XML_WITH_TREE:
856 #ifdef LIBXML_TREE_ENABLED
857 return(1);
858 #else
859 return(0);
860 #endif
861 case XML_WITH_OUTPUT:
862 #ifdef LIBXML_OUTPUT_ENABLED
863 return(1);
864 #else
865 return(0);
866 #endif
867 case XML_WITH_PUSH:
868 #ifdef LIBXML_PUSH_ENABLED
869 return(1);
870 #else
871 return(0);
872 #endif
873 case XML_WITH_READER:
874 #ifdef LIBXML_READER_ENABLED
875 return(1);
876 #else
877 return(0);
878 #endif
879 case XML_WITH_PATTERN:
880 #ifdef LIBXML_PATTERN_ENABLED
881 return(1);
882 #else
883 return(0);
884 #endif
885 case XML_WITH_WRITER:
886 #ifdef LIBXML_WRITER_ENABLED
887 return(1);
888 #else
889 return(0);
890 #endif
891 case XML_WITH_SAX1:
892 #ifdef LIBXML_SAX1_ENABLED
893 return(1);
894 #else
895 return(0);
896 #endif
897 case XML_WITH_FTP:
898 #ifdef LIBXML_FTP_ENABLED
899 return(1);
900 #else
901 return(0);
902 #endif
903 case XML_WITH_HTTP:
904 #ifdef LIBXML_HTTP_ENABLED
905 return(1);
906 #else
907 return(0);
908 #endif
909 case XML_WITH_VALID:
910 #ifdef LIBXML_VALID_ENABLED
911 return(1);
912 #else
913 return(0);
914 #endif
915 case XML_WITH_HTML:
916 #ifdef LIBXML_HTML_ENABLED
917 return(1);
918 #else
919 return(0);
920 #endif
921 case XML_WITH_LEGACY:
922 #ifdef LIBXML_LEGACY_ENABLED
923 return(1);
924 #else
925 return(0);
926 #endif
927 case XML_WITH_C14N:
928 #ifdef LIBXML_C14N_ENABLED
929 return(1);
930 #else
931 return(0);
932 #endif
933 case XML_WITH_CATALOG:
934 #ifdef LIBXML_CATALOG_ENABLED
935 return(1);
936 #else
937 return(0);
938 #endif
939 case XML_WITH_XPATH:
940 #ifdef LIBXML_XPATH_ENABLED
941 return(1);
942 #else
943 return(0);
944 #endif
945 case XML_WITH_XPTR:
946 #ifdef LIBXML_XPTR_ENABLED
947 return(1);
948 #else
949 return(0);
950 #endif
951 case XML_WITH_XINCLUDE:
952 #ifdef LIBXML_XINCLUDE_ENABLED
953 return(1);
954 #else
955 return(0);
956 #endif
957 case XML_WITH_ICONV:
958 #ifdef LIBXML_ICONV_ENABLED
959 return(1);
960 #else
961 return(0);
962 #endif
963 case XML_WITH_ISO8859X:
964 #ifdef LIBXML_ISO8859X_ENABLED
965 return(1);
966 #else
967 return(0);
968 #endif
969 case XML_WITH_UNICODE:
970 #ifdef LIBXML_UNICODE_ENABLED
971 return(1);
972 #else
973 return(0);
974 #endif
975 case XML_WITH_REGEXP:
976 #ifdef LIBXML_REGEXP_ENABLED
977 return(1);
978 #else
979 return(0);
980 #endif
981 case XML_WITH_AUTOMATA:
982 #ifdef LIBXML_AUTOMATA_ENABLED
983 return(1);
984 #else
985 return(0);
986 #endif
987 case XML_WITH_EXPR:
988 #ifdef LIBXML_EXPR_ENABLED
989 return(1);
990 #else
991 return(0);
992 #endif
993 case XML_WITH_SCHEMAS:
994 #ifdef LIBXML_SCHEMAS_ENABLED
995 return(1);
996 #else
997 return(0);
998 #endif
999 case XML_WITH_SCHEMATRON:
1000 #ifdef LIBXML_SCHEMATRON_ENABLED
1001 return(1);
1002 #else
1003 return(0);
1004 #endif
1005 case XML_WITH_MODULES:
1006 #ifdef LIBXML_MODULES_ENABLED
1007 return(1);
1008 #else
1009 return(0);
1010 #endif
1011 case XML_WITH_DEBUG:
1012 #ifdef LIBXML_DEBUG_ENABLED
1013 return(1);
1014 #else
1015 return(0);
1016 #endif
1017 case XML_WITH_DEBUG_MEM:
1018 #ifdef DEBUG_MEMORY_LOCATION
1019 return(1);
1020 #else
1021 return(0);
1022 #endif
1023 case XML_WITH_DEBUG_RUN:
1024 #ifdef LIBXML_DEBUG_RUNTIME
1025 return(1);
1026 #else
1027 return(0);
1028 #endif
1029 case XML_WITH_ZLIB:
1030 #ifdef LIBXML_ZLIB_ENABLED
1031 return(1);
1032 #else
1033 return(0);
1034 #endif
1035 case XML_WITH_LZMA:
1036 #ifdef LIBXML_LZMA_ENABLED
1037 return(1);
1038 #else
1039 return(0);
1040 #endif
1041 case XML_WITH_ICU:
1042 #ifdef LIBXML_ICU_ENABLED
1043 return(1);
1044 #else
1045 return(0);
1046 #endif
1047 default:
1048 break;
1049 }
1050 return(0);
1051 }
1052
1053 /************************************************************************
1054 * *
1055 * SAX2 defaulted attributes handling *
1056 * *
1057 ************************************************************************/
1058
1059 /**
1060 * xmlDetectSAX2:
1061 * @ctxt: an XML parser context
1062 *
1063 * Do the SAX2 detection and specific intialization
1064 */
1065 static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt)1066 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1067 if (ctxt == NULL) return;
1068 #ifdef LIBXML_SAX1_ENABLED
1069 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1070 ((ctxt->sax->startElementNs != NULL) ||
1071 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1072 #else
1073 ctxt->sax2 = 1;
1074 #endif /* LIBXML_SAX1_ENABLED */
1075
1076 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1077 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1078 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1079 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1080 (ctxt->str_xml_ns == NULL)) {
1081 xmlErrMemory(ctxt, NULL);
1082 }
1083 }
1084
1085 typedef struct _xmlDefAttrs xmlDefAttrs;
1086 typedef xmlDefAttrs *xmlDefAttrsPtr;
1087 struct _xmlDefAttrs {
1088 int nbAttrs; /* number of defaulted attributes on that element */
1089 int maxAttrs; /* the size of the array */
1090 const xmlChar *values[5]; /* array of localname/prefix/values/external */
1091 };
1092
1093 /**
1094 * xmlAttrNormalizeSpace:
1095 * @src: the source string
1096 * @dst: the target string
1097 *
1098 * Normalize the space in non CDATA attribute values:
1099 * If the attribute type is not CDATA, then the XML processor MUST further
1100 * process the normalized attribute value by discarding any leading and
1101 * trailing space (#x20) characters, and by replacing sequences of space
1102 * (#x20) characters by a single space (#x20) character.
1103 * Note that the size of dst need to be at least src, and if one doesn't need
1104 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1105 * passing src as dst is just fine.
1106 *
1107 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1108 * is needed.
1109 */
1110 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1111 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1112 {
1113 if ((src == NULL) || (dst == NULL))
1114 return(NULL);
1115
1116 while (*src == 0x20) src++;
1117 while (*src != 0) {
1118 if (*src == 0x20) {
1119 while (*src == 0x20) src++;
1120 if (*src != 0)
1121 *dst++ = 0x20;
1122 } else {
1123 *dst++ = *src++;
1124 }
1125 }
1126 *dst = 0;
1127 if (dst == src)
1128 return(NULL);
1129 return(dst);
1130 }
1131
1132 /**
1133 * xmlAttrNormalizeSpace2:
1134 * @src: the source string
1135 *
1136 * Normalize the space in non CDATA attribute values, a slightly more complex
1137 * front end to avoid allocation problems when running on attribute values
1138 * coming from the input.
1139 *
1140 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1141 * is needed.
1142 */
1143 static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt,xmlChar * src,int * len)1144 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1145 {
1146 int i;
1147 int remove_head = 0;
1148 int need_realloc = 0;
1149 const xmlChar *cur;
1150
1151 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1152 return(NULL);
1153 i = *len;
1154 if (i <= 0)
1155 return(NULL);
1156
1157 cur = src;
1158 while (*cur == 0x20) {
1159 cur++;
1160 remove_head++;
1161 }
1162 while (*cur != 0) {
1163 if (*cur == 0x20) {
1164 cur++;
1165 if ((*cur == 0x20) || (*cur == 0)) {
1166 need_realloc = 1;
1167 break;
1168 }
1169 } else
1170 cur++;
1171 }
1172 if (need_realloc) {
1173 xmlChar *ret;
1174
1175 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1176 if (ret == NULL) {
1177 xmlErrMemory(ctxt, NULL);
1178 return(NULL);
1179 }
1180 xmlAttrNormalizeSpace(ret, ret);
1181 *len = (int) strlen((const char *)ret);
1182 return(ret);
1183 } else if (remove_head) {
1184 *len -= remove_head;
1185 memmove(src, src + remove_head, 1 + *len);
1186 return(src);
1187 }
1188 return(NULL);
1189 }
1190
1191 /**
1192 * xmlAddDefAttrs:
1193 * @ctxt: an XML parser context
1194 * @fullname: the element fullname
1195 * @fullattr: the attribute fullname
1196 * @value: the attribute value
1197 *
1198 * Add a defaulted attribute for an element
1199 */
1200 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1201 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1202 const xmlChar *fullname,
1203 const xmlChar *fullattr,
1204 const xmlChar *value) {
1205 xmlDefAttrsPtr defaults;
1206 int len;
1207 const xmlChar *name;
1208 const xmlChar *prefix;
1209
1210 /*
1211 * Allows to detect attribute redefinitions
1212 */
1213 if (ctxt->attsSpecial != NULL) {
1214 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1215 return;
1216 }
1217
1218 if (ctxt->attsDefault == NULL) {
1219 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1220 if (ctxt->attsDefault == NULL)
1221 goto mem_error;
1222 }
1223
1224 /*
1225 * split the element name into prefix:localname , the string found
1226 * are within the DTD and then not associated to namespace names.
1227 */
1228 name = xmlSplitQName3(fullname, &len);
1229 if (name == NULL) {
1230 name = xmlDictLookup(ctxt->dict, fullname, -1);
1231 prefix = NULL;
1232 } else {
1233 name = xmlDictLookup(ctxt->dict, name, -1);
1234 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1235 }
1236
1237 /*
1238 * make sure there is some storage
1239 */
1240 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1241 if (defaults == NULL) {
1242 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1243 (4 * 5) * sizeof(const xmlChar *));
1244 if (defaults == NULL)
1245 goto mem_error;
1246 defaults->nbAttrs = 0;
1247 defaults->maxAttrs = 4;
1248 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1249 defaults, NULL) < 0) {
1250 xmlFree(defaults);
1251 goto mem_error;
1252 }
1253 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1254 xmlDefAttrsPtr temp;
1255
1256 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1257 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1258 if (temp == NULL)
1259 goto mem_error;
1260 defaults = temp;
1261 defaults->maxAttrs *= 2;
1262 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1263 defaults, NULL) < 0) {
1264 xmlFree(defaults);
1265 goto mem_error;
1266 }
1267 }
1268
1269 /*
1270 * Split the element name into prefix:localname , the string found
1271 * are within the DTD and hen not associated to namespace names.
1272 */
1273 name = xmlSplitQName3(fullattr, &len);
1274 if (name == NULL) {
1275 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1276 prefix = NULL;
1277 } else {
1278 name = xmlDictLookup(ctxt->dict, name, -1);
1279 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1280 }
1281
1282 defaults->values[5 * defaults->nbAttrs] = name;
1283 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1284 /* intern the string and precompute the end */
1285 len = xmlStrlen(value);
1286 value = xmlDictLookup(ctxt->dict, value, len);
1287 defaults->values[5 * defaults->nbAttrs + 2] = value;
1288 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1289 if (ctxt->external)
1290 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1291 else
1292 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1293 defaults->nbAttrs++;
1294
1295 return;
1296
1297 mem_error:
1298 xmlErrMemory(ctxt, NULL);
1299 return;
1300 }
1301
1302 /**
1303 * xmlAddSpecialAttr:
1304 * @ctxt: an XML parser context
1305 * @fullname: the element fullname
1306 * @fullattr: the attribute fullname
1307 * @type: the attribute type
1308 *
1309 * Register this attribute type
1310 */
1311 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1312 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1313 const xmlChar *fullname,
1314 const xmlChar *fullattr,
1315 int type)
1316 {
1317 if (ctxt->attsSpecial == NULL) {
1318 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1319 if (ctxt->attsSpecial == NULL)
1320 goto mem_error;
1321 }
1322
1323 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1324 return;
1325
1326 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1327 (void *) (long) type);
1328 return;
1329
1330 mem_error:
1331 xmlErrMemory(ctxt, NULL);
1332 return;
1333 }
1334
1335 /**
1336 * xmlCleanSpecialAttrCallback:
1337 *
1338 * Removes CDATA attributes from the special attribute table
1339 */
1340 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1341 xmlCleanSpecialAttrCallback(void *payload, void *data,
1342 const xmlChar *fullname, const xmlChar *fullattr,
1343 const xmlChar *unused ATTRIBUTE_UNUSED) {
1344 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1345
1346 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1347 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1348 }
1349 }
1350
1351 /**
1352 * xmlCleanSpecialAttr:
1353 * @ctxt: an XML parser context
1354 *
1355 * Trim the list of attributes defined to remove all those of type
1356 * CDATA as they are not special. This call should be done when finishing
1357 * to parse the DTD and before starting to parse the document root.
1358 */
1359 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1360 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1361 {
1362 if (ctxt->attsSpecial == NULL)
1363 return;
1364
1365 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1366
1367 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1368 xmlHashFree(ctxt->attsSpecial, NULL);
1369 ctxt->attsSpecial = NULL;
1370 }
1371 return;
1372 }
1373
1374 /**
1375 * xmlCheckLanguageID:
1376 * @lang: pointer to the string value
1377 *
1378 * Checks that the value conforms to the LanguageID production:
1379 *
1380 * NOTE: this is somewhat deprecated, those productions were removed from
1381 * the XML Second edition.
1382 *
1383 * [33] LanguageID ::= Langcode ('-' Subcode)*
1384 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1385 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1386 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1387 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1388 * [38] Subcode ::= ([a-z] | [A-Z])+
1389 *
1390 * The current REC reference the sucessors of RFC 1766, currently 5646
1391 *
1392 * http://www.rfc-editor.org/rfc/rfc5646.txt
1393 * langtag = language
1394 * ["-" script]
1395 * ["-" region]
1396 * *("-" variant)
1397 * *("-" extension)
1398 * ["-" privateuse]
1399 * language = 2*3ALPHA ; shortest ISO 639 code
1400 * ["-" extlang] ; sometimes followed by
1401 * ; extended language subtags
1402 * / 4ALPHA ; or reserved for future use
1403 * / 5*8ALPHA ; or registered language subtag
1404 *
1405 * extlang = 3ALPHA ; selected ISO 639 codes
1406 * *2("-" 3ALPHA) ; permanently reserved
1407 *
1408 * script = 4ALPHA ; ISO 15924 code
1409 *
1410 * region = 2ALPHA ; ISO 3166-1 code
1411 * / 3DIGIT ; UN M.49 code
1412 *
1413 * variant = 5*8alphanum ; registered variants
1414 * / (DIGIT 3alphanum)
1415 *
1416 * extension = singleton 1*("-" (2*8alphanum))
1417 *
1418 * ; Single alphanumerics
1419 * ; "x" reserved for private use
1420 * singleton = DIGIT ; 0 - 9
1421 * / %x41-57 ; A - W
1422 * / %x59-5A ; Y - Z
1423 * / %x61-77 ; a - w
1424 * / %x79-7A ; y - z
1425 *
1426 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1427 * The parser below doesn't try to cope with extension or privateuse
1428 * that could be added but that's not interoperable anyway
1429 *
1430 * Returns 1 if correct 0 otherwise
1431 **/
1432 int
xmlCheckLanguageID(const xmlChar * lang)1433 xmlCheckLanguageID(const xmlChar * lang)
1434 {
1435 const xmlChar *cur = lang, *nxt;
1436
1437 if (cur == NULL)
1438 return (0);
1439 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1440 ((cur[0] == 'I') && (cur[1] == '-')) ||
1441 ((cur[0] == 'x') && (cur[1] == '-')) ||
1442 ((cur[0] == 'X') && (cur[1] == '-'))) {
1443 /*
1444 * Still allow IANA code and user code which were coming
1445 * from the previous version of the XML-1.0 specification
1446 * it's deprecated but we should not fail
1447 */
1448 cur += 2;
1449 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1450 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1451 cur++;
1452 return(cur[0] == 0);
1453 }
1454 nxt = cur;
1455 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1456 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1457 nxt++;
1458 if (nxt - cur >= 4) {
1459 /*
1460 * Reserved
1461 */
1462 if ((nxt - cur > 8) || (nxt[0] != 0))
1463 return(0);
1464 return(1);
1465 }
1466 if (nxt - cur < 2)
1467 return(0);
1468 /* we got an ISO 639 code */
1469 if (nxt[0] == 0)
1470 return(1);
1471 if (nxt[0] != '-')
1472 return(0);
1473
1474 nxt++;
1475 cur = nxt;
1476 /* now we can have extlang or script or region or variant */
1477 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1478 goto region_m49;
1479
1480 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1481 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1482 nxt++;
1483 if (nxt - cur == 4)
1484 goto script;
1485 if (nxt - cur == 2)
1486 goto region;
1487 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1488 goto variant;
1489 if (nxt - cur != 3)
1490 return(0);
1491 /* we parsed an extlang */
1492 if (nxt[0] == 0)
1493 return(1);
1494 if (nxt[0] != '-')
1495 return(0);
1496
1497 nxt++;
1498 cur = nxt;
1499 /* now we can have script or region or variant */
1500 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1501 goto region_m49;
1502
1503 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1504 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1505 nxt++;
1506 if (nxt - cur == 2)
1507 goto region;
1508 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1509 goto variant;
1510 if (nxt - cur != 4)
1511 return(0);
1512 /* we parsed a script */
1513 script:
1514 if (nxt[0] == 0)
1515 return(1);
1516 if (nxt[0] != '-')
1517 return(0);
1518
1519 nxt++;
1520 cur = nxt;
1521 /* now we can have region or variant */
1522 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1523 goto region_m49;
1524
1525 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1526 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1527 nxt++;
1528
1529 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1530 goto variant;
1531 if (nxt - cur != 2)
1532 return(0);
1533 /* we parsed a region */
1534 region:
1535 if (nxt[0] == 0)
1536 return(1);
1537 if (nxt[0] != '-')
1538 return(0);
1539
1540 nxt++;
1541 cur = nxt;
1542 /* now we can just have a variant */
1543 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1544 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1545 nxt++;
1546
1547 if ((nxt - cur < 5) || (nxt - cur > 8))
1548 return(0);
1549
1550 /* we parsed a variant */
1551 variant:
1552 if (nxt[0] == 0)
1553 return(1);
1554 if (nxt[0] != '-')
1555 return(0);
1556 /* extensions and private use subtags not checked */
1557 return (1);
1558
1559 region_m49:
1560 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1561 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1562 nxt += 3;
1563 goto region;
1564 }
1565 return(0);
1566 }
1567
1568 /************************************************************************
1569 * *
1570 * Parser stacks related functions and macros *
1571 * *
1572 ************************************************************************/
1573
1574 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1575 const xmlChar ** str);
1576
1577 #ifdef SAX2
1578 /**
1579 * nsPush:
1580 * @ctxt: an XML parser context
1581 * @prefix: the namespace prefix or NULL
1582 * @URL: the namespace name
1583 *
1584 * Pushes a new parser namespace on top of the ns stack
1585 *
1586 * Returns -1 in case of error, -2 if the namespace should be discarded
1587 * and the index in the stack otherwise.
1588 */
1589 static int
nsPush(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URL)1590 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1591 {
1592 if (ctxt->options & XML_PARSE_NSCLEAN) {
1593 int i;
1594 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1595 if (ctxt->nsTab[i] == prefix) {
1596 /* in scope */
1597 if (ctxt->nsTab[i + 1] == URL)
1598 return(-2);
1599 /* out of scope keep it */
1600 break;
1601 }
1602 }
1603 }
1604 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1605 ctxt->nsMax = 10;
1606 ctxt->nsNr = 0;
1607 ctxt->nsTab = (const xmlChar **)
1608 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1609 if (ctxt->nsTab == NULL) {
1610 xmlErrMemory(ctxt, NULL);
1611 ctxt->nsMax = 0;
1612 return (-1);
1613 }
1614 } else if (ctxt->nsNr >= ctxt->nsMax) {
1615 const xmlChar ** tmp;
1616 ctxt->nsMax *= 2;
1617 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1618 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1619 if (tmp == NULL) {
1620 xmlErrMemory(ctxt, NULL);
1621 ctxt->nsMax /= 2;
1622 return (-1);
1623 }
1624 ctxt->nsTab = tmp;
1625 }
1626 ctxt->nsTab[ctxt->nsNr++] = prefix;
1627 ctxt->nsTab[ctxt->nsNr++] = URL;
1628 return (ctxt->nsNr);
1629 }
1630 /**
1631 * nsPop:
1632 * @ctxt: an XML parser context
1633 * @nr: the number to pop
1634 *
1635 * Pops the top @nr parser prefix/namespace from the ns stack
1636 *
1637 * Returns the number of namespaces removed
1638 */
1639 static int
nsPop(xmlParserCtxtPtr ctxt,int nr)1640 nsPop(xmlParserCtxtPtr ctxt, int nr)
1641 {
1642 int i;
1643
1644 if (ctxt->nsTab == NULL) return(0);
1645 if (ctxt->nsNr < nr) {
1646 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1647 nr = ctxt->nsNr;
1648 }
1649 if (ctxt->nsNr <= 0)
1650 return (0);
1651
1652 for (i = 0;i < nr;i++) {
1653 ctxt->nsNr--;
1654 ctxt->nsTab[ctxt->nsNr] = NULL;
1655 }
1656 return(nr);
1657 }
1658 #endif
1659
1660 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1661 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1662 const xmlChar **atts;
1663 int *attallocs;
1664 int maxatts;
1665
1666 if (ctxt->atts == NULL) {
1667 maxatts = 55; /* allow for 10 attrs by default */
1668 atts = (const xmlChar **)
1669 xmlMalloc(maxatts * sizeof(xmlChar *));
1670 if (atts == NULL) goto mem_error;
1671 ctxt->atts = atts;
1672 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1673 if (attallocs == NULL) goto mem_error;
1674 ctxt->attallocs = attallocs;
1675 ctxt->maxatts = maxatts;
1676 } else if (nr + 5 > ctxt->maxatts) {
1677 maxatts = (nr + 5) * 2;
1678 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1679 maxatts * sizeof(const xmlChar *));
1680 if (atts == NULL) goto mem_error;
1681 ctxt->atts = atts;
1682 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1683 (maxatts / 5) * sizeof(int));
1684 if (attallocs == NULL) goto mem_error;
1685 ctxt->attallocs = attallocs;
1686 ctxt->maxatts = maxatts;
1687 }
1688 return(ctxt->maxatts);
1689 mem_error:
1690 xmlErrMemory(ctxt, NULL);
1691 return(-1);
1692 }
1693
1694 /**
1695 * inputPush:
1696 * @ctxt: an XML parser context
1697 * @value: the parser input
1698 *
1699 * Pushes a new parser input on top of the input stack
1700 *
1701 * Returns -1 in case of error, the index in the stack otherwise
1702 */
1703 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1704 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1705 {
1706 if ((ctxt == NULL) || (value == NULL))
1707 return(-1);
1708 if (ctxt->inputNr >= ctxt->inputMax) {
1709 ctxt->inputMax *= 2;
1710 ctxt->inputTab =
1711 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1712 ctxt->inputMax *
1713 sizeof(ctxt->inputTab[0]));
1714 if (ctxt->inputTab == NULL) {
1715 xmlErrMemory(ctxt, NULL);
1716 xmlFreeInputStream(value);
1717 ctxt->inputMax /= 2;
1718 value = NULL;
1719 return (-1);
1720 }
1721 }
1722 ctxt->inputTab[ctxt->inputNr] = value;
1723 ctxt->input = value;
1724 return (ctxt->inputNr++);
1725 }
1726 /**
1727 * inputPop:
1728 * @ctxt: an XML parser context
1729 *
1730 * Pops the top parser input from the input stack
1731 *
1732 * Returns the input just removed
1733 */
1734 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1735 inputPop(xmlParserCtxtPtr ctxt)
1736 {
1737 xmlParserInputPtr ret;
1738
1739 if (ctxt == NULL)
1740 return(NULL);
1741 if (ctxt->inputNr <= 0)
1742 return (NULL);
1743 ctxt->inputNr--;
1744 if (ctxt->inputNr > 0)
1745 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1746 else
1747 ctxt->input = NULL;
1748 ret = ctxt->inputTab[ctxt->inputNr];
1749 ctxt->inputTab[ctxt->inputNr] = NULL;
1750 return (ret);
1751 }
1752 /**
1753 * nodePush:
1754 * @ctxt: an XML parser context
1755 * @value: the element node
1756 *
1757 * Pushes a new element node on top of the node stack
1758 *
1759 * Returns -1 in case of error, the index in the stack otherwise
1760 */
1761 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1762 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1763 {
1764 if (ctxt == NULL) return(0);
1765 if (ctxt->nodeNr >= ctxt->nodeMax) {
1766 xmlNodePtr *tmp;
1767
1768 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1769 ctxt->nodeMax * 2 *
1770 sizeof(ctxt->nodeTab[0]));
1771 if (tmp == NULL) {
1772 xmlErrMemory(ctxt, NULL);
1773 return (-1);
1774 }
1775 ctxt->nodeTab = tmp;
1776 ctxt->nodeMax *= 2;
1777 }
1778 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1779 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1780 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1781 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1782 xmlParserMaxDepth);
1783 xmlHaltParser(ctxt);
1784 return(-1);
1785 }
1786 ctxt->nodeTab[ctxt->nodeNr] = value;
1787 ctxt->node = value;
1788 return (ctxt->nodeNr++);
1789 }
1790
1791 /**
1792 * nodePop:
1793 * @ctxt: an XML parser context
1794 *
1795 * Pops the top element node from the node stack
1796 *
1797 * Returns the node just removed
1798 */
1799 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)1800 nodePop(xmlParserCtxtPtr ctxt)
1801 {
1802 xmlNodePtr ret;
1803
1804 if (ctxt == NULL) return(NULL);
1805 if (ctxt->nodeNr <= 0)
1806 return (NULL);
1807 ctxt->nodeNr--;
1808 if (ctxt->nodeNr > 0)
1809 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1810 else
1811 ctxt->node = NULL;
1812 ret = ctxt->nodeTab[ctxt->nodeNr];
1813 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1814 return (ret);
1815 }
1816
1817 #ifdef LIBXML_PUSH_ENABLED
1818 /**
1819 * nameNsPush:
1820 * @ctxt: an XML parser context
1821 * @value: the element name
1822 * @prefix: the element prefix
1823 * @URI: the element namespace name
1824 *
1825 * Pushes a new element name/prefix/URL on top of the name stack
1826 *
1827 * Returns -1 in case of error, the index in the stack otherwise
1828 */
1829 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int nsNr)1830 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1831 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1832 {
1833 if (ctxt->nameNr >= ctxt->nameMax) {
1834 const xmlChar * *tmp;
1835 void **tmp2;
1836 ctxt->nameMax *= 2;
1837 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1838 ctxt->nameMax *
1839 sizeof(ctxt->nameTab[0]));
1840 if (tmp == NULL) {
1841 ctxt->nameMax /= 2;
1842 goto mem_error;
1843 }
1844 ctxt->nameTab = tmp;
1845 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1846 ctxt->nameMax * 3 *
1847 sizeof(ctxt->pushTab[0]));
1848 if (tmp2 == NULL) {
1849 ctxt->nameMax /= 2;
1850 goto mem_error;
1851 }
1852 ctxt->pushTab = tmp2;
1853 }
1854 ctxt->nameTab[ctxt->nameNr] = value;
1855 ctxt->name = value;
1856 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1857 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1858 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1859 return (ctxt->nameNr++);
1860 mem_error:
1861 xmlErrMemory(ctxt, NULL);
1862 return (-1);
1863 }
1864 /**
1865 * nameNsPop:
1866 * @ctxt: an XML parser context
1867 *
1868 * Pops the top element/prefix/URI name from the name stack
1869 *
1870 * Returns the name just removed
1871 */
1872 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)1873 nameNsPop(xmlParserCtxtPtr ctxt)
1874 {
1875 const xmlChar *ret;
1876
1877 if (ctxt->nameNr <= 0)
1878 return (NULL);
1879 ctxt->nameNr--;
1880 if (ctxt->nameNr > 0)
1881 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1882 else
1883 ctxt->name = NULL;
1884 ret = ctxt->nameTab[ctxt->nameNr];
1885 ctxt->nameTab[ctxt->nameNr] = NULL;
1886 return (ret);
1887 }
1888 #endif /* LIBXML_PUSH_ENABLED */
1889
1890 /**
1891 * namePush:
1892 * @ctxt: an XML parser context
1893 * @value: the element name
1894 *
1895 * Pushes a new element name on top of the name stack
1896 *
1897 * Returns -1 in case of error, the index in the stack otherwise
1898 */
1899 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)1900 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1901 {
1902 if (ctxt == NULL) return (-1);
1903
1904 if (ctxt->nameNr >= ctxt->nameMax) {
1905 const xmlChar * *tmp;
1906 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1907 ctxt->nameMax * 2 *
1908 sizeof(ctxt->nameTab[0]));
1909 if (tmp == NULL) {
1910 goto mem_error;
1911 }
1912 ctxt->nameTab = tmp;
1913 ctxt->nameMax *= 2;
1914 }
1915 ctxt->nameTab[ctxt->nameNr] = value;
1916 ctxt->name = value;
1917 return (ctxt->nameNr++);
1918 mem_error:
1919 xmlErrMemory(ctxt, NULL);
1920 return (-1);
1921 }
1922 /**
1923 * namePop:
1924 * @ctxt: an XML parser context
1925 *
1926 * Pops the top element name from the name stack
1927 *
1928 * Returns the name just removed
1929 */
1930 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)1931 namePop(xmlParserCtxtPtr ctxt)
1932 {
1933 const xmlChar *ret;
1934
1935 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1936 return (NULL);
1937 ctxt->nameNr--;
1938 if (ctxt->nameNr > 0)
1939 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1940 else
1941 ctxt->name = NULL;
1942 ret = ctxt->nameTab[ctxt->nameNr];
1943 ctxt->nameTab[ctxt->nameNr] = NULL;
1944 return (ret);
1945 }
1946
spacePush(xmlParserCtxtPtr ctxt,int val)1947 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1948 if (ctxt->spaceNr >= ctxt->spaceMax) {
1949 int *tmp;
1950
1951 ctxt->spaceMax *= 2;
1952 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1953 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1954 if (tmp == NULL) {
1955 xmlErrMemory(ctxt, NULL);
1956 ctxt->spaceMax /=2;
1957 return(-1);
1958 }
1959 ctxt->spaceTab = tmp;
1960 }
1961 ctxt->spaceTab[ctxt->spaceNr] = val;
1962 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1963 return(ctxt->spaceNr++);
1964 }
1965
spacePop(xmlParserCtxtPtr ctxt)1966 static int spacePop(xmlParserCtxtPtr ctxt) {
1967 int ret;
1968 if (ctxt->spaceNr <= 0) return(0);
1969 ctxt->spaceNr--;
1970 if (ctxt->spaceNr > 0)
1971 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1972 else
1973 ctxt->space = &ctxt->spaceTab[0];
1974 ret = ctxt->spaceTab[ctxt->spaceNr];
1975 ctxt->spaceTab[ctxt->spaceNr] = -1;
1976 return(ret);
1977 }
1978
1979 /*
1980 * Macros for accessing the content. Those should be used only by the parser,
1981 * and not exported.
1982 *
1983 * Dirty macros, i.e. one often need to make assumption on the context to
1984 * use them
1985 *
1986 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1987 * To be used with extreme caution since operations consuming
1988 * characters may move the input buffer to a different location !
1989 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1990 * This should be used internally by the parser
1991 * only to compare to ASCII values otherwise it would break when
1992 * running with UTF-8 encoding.
1993 * RAW same as CUR but in the input buffer, bypass any token
1994 * extraction that may have been done
1995 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1996 * to compare on ASCII based substring.
1997 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1998 * strings without newlines within the parser.
1999 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2000 * defined char within the parser.
2001 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2002 *
2003 * NEXT Skip to the next character, this does the proper decoding
2004 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2005 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2006 * CUR_CHAR(l) returns the current unicode character (int), set l
2007 * to the number of xmlChars used for the encoding [0-5].
2008 * CUR_SCHAR same but operate on a string instead of the context
2009 * COPY_BUF copy the current unicode char to the target buffer, increment
2010 * the index
2011 * GROW, SHRINK handling of input buffers
2012 */
2013
2014 #define RAW (*ctxt->input->cur)
2015 #define CUR (*ctxt->input->cur)
2016 #define NXT(val) ctxt->input->cur[(val)]
2017 #define CUR_PTR ctxt->input->cur
2018 #define BASE_PTR ctxt->input->base
2019
2020 #define CMP4( s, c1, c2, c3, c4 ) \
2021 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2022 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2023 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2024 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2025 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2026 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2027 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2028 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2029 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2030 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2031 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2032 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2033 ((unsigned char *) s)[ 8 ] == c9 )
2034 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2035 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2036 ((unsigned char *) s)[ 9 ] == c10 )
2037
2038 #define SKIP(val) do { \
2039 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
2040 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2041 if ((*ctxt->input->cur == 0) && \
2042 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2043 xmlPopInput(ctxt); \
2044 } while (0)
2045
2046 #define SKIPL(val) do { \
2047 int skipl; \
2048 for(skipl=0; skipl<val; skipl++) { \
2049 if (*(ctxt->input->cur) == '\n') { \
2050 ctxt->input->line++; ctxt->input->col = 1; \
2051 } else ctxt->input->col++; \
2052 ctxt->nbChars++; \
2053 ctxt->input->cur++; \
2054 } \
2055 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2056 if ((*ctxt->input->cur == 0) && \
2057 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2058 xmlPopInput(ctxt); \
2059 } while (0)
2060
2061 #define SHRINK if ((ctxt->progressive == 0) && \
2062 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2063 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2064 xmlSHRINK (ctxt);
2065
xmlSHRINK(xmlParserCtxtPtr ctxt)2066 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2067 xmlParserInputShrink(ctxt->input);
2068 if ((*ctxt->input->cur == 0) &&
2069 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2070 xmlPopInput(ctxt);
2071 }
2072
2073 #define GROW if ((ctxt->progressive == 0) && \
2074 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2075 xmlGROW (ctxt);
2076
xmlGROW(xmlParserCtxtPtr ctxt)2077 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2078 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2079 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2080
2081 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2082 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
2083 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
2084 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2085 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2086 xmlHaltParser(ctxt);
2087 return;
2088 }
2089 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2090 if ((ctxt->input->cur > ctxt->input->end) ||
2091 (ctxt->input->cur < ctxt->input->base)) {
2092 xmlHaltParser(ctxt);
2093 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2094 return;
2095 }
2096 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
2097 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2098 xmlPopInput(ctxt);
2099 }
2100
2101 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2102
2103 #define NEXT xmlNextChar(ctxt)
2104
2105 #define NEXT1 { \
2106 ctxt->input->col++; \
2107 ctxt->input->cur++; \
2108 ctxt->nbChars++; \
2109 if (*ctxt->input->cur == 0) \
2110 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2111 }
2112
2113 #define NEXTL(l) do { \
2114 if (*(ctxt->input->cur) == '\n') { \
2115 ctxt->input->line++; ctxt->input->col = 1; \
2116 } else ctxt->input->col++; \
2117 ctxt->input->cur += l; \
2118 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2119 } while (0)
2120
2121 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2122 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2123
2124 #define COPY_BUF(l,b,i,v) \
2125 if (l == 1) b[i++] = (xmlChar) v; \
2126 else i += xmlCopyCharMultiByte(&b[i],v)
2127
2128 /**
2129 * xmlSkipBlankChars:
2130 * @ctxt: the XML parser context
2131 *
2132 * skip all blanks character found at that point in the input streams.
2133 * It pops up finished entities in the process if allowable at that point.
2134 *
2135 * Returns the number of space chars skipped
2136 */
2137
2138 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2139 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2140 int res = 0;
2141
2142 /*
2143 * It's Okay to use CUR/NEXT here since all the blanks are on
2144 * the ASCII range.
2145 */
2146 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2147 const xmlChar *cur;
2148 /*
2149 * if we are in the document content, go really fast
2150 */
2151 cur = ctxt->input->cur;
2152 while (IS_BLANK_CH(*cur)) {
2153 if (*cur == '\n') {
2154 ctxt->input->line++; ctxt->input->col = 1;
2155 } else {
2156 ctxt->input->col++;
2157 }
2158 cur++;
2159 res++;
2160 if (*cur == 0) {
2161 ctxt->input->cur = cur;
2162 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2163 cur = ctxt->input->cur;
2164 }
2165 }
2166 ctxt->input->cur = cur;
2167 } else {
2168 int cur;
2169 do {
2170 cur = CUR;
2171 while ((IS_BLANK_CH(cur) && /* CHECKED tstblanks.xml */
2172 (ctxt->instate != XML_PARSER_EOF))) {
2173 NEXT;
2174 cur = CUR;
2175 res++;
2176 }
2177 while ((cur == 0) && (ctxt->inputNr > 1) &&
2178 (ctxt->instate != XML_PARSER_COMMENT)) {
2179 xmlPopInput(ctxt);
2180 cur = CUR;
2181 }
2182 /*
2183 * Need to handle support of entities branching here
2184 */
2185 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2186 } while ((IS_BLANK(cur)) && /* CHECKED tstblanks.xml */
2187 (ctxt->instate != XML_PARSER_EOF));
2188 }
2189 return(res);
2190 }
2191
2192 /************************************************************************
2193 * *
2194 * Commodity functions to handle entities *
2195 * *
2196 ************************************************************************/
2197
2198 /**
2199 * xmlPopInput:
2200 * @ctxt: an XML parser context
2201 *
2202 * xmlPopInput: the current input pointed by ctxt->input came to an end
2203 * pop it and return the next char.
2204 *
2205 * Returns the current xmlChar in the parser context
2206 */
2207 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2208 xmlPopInput(xmlParserCtxtPtr ctxt) {
2209 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2210 if (xmlParserDebugEntities)
2211 xmlGenericError(xmlGenericErrorContext,
2212 "Popping input %d\n", ctxt->inputNr);
2213 xmlFreeInputStream(inputPop(ctxt));
2214 if ((*ctxt->input->cur == 0) &&
2215 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2216 return(xmlPopInput(ctxt));
2217 return(CUR);
2218 }
2219
2220 /**
2221 * xmlPushInput:
2222 * @ctxt: an XML parser context
2223 * @input: an XML parser input fragment (entity, XML fragment ...).
2224 *
2225 * xmlPushInput: switch to a new input stream which is stacked on top
2226 * of the previous one(s).
2227 * Returns -1 in case of error or the index in the input stack
2228 */
2229 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2230 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2231 int ret;
2232 if (input == NULL) return(-1);
2233
2234 if (xmlParserDebugEntities) {
2235 if ((ctxt->input != NULL) && (ctxt->input->filename))
2236 xmlGenericError(xmlGenericErrorContext,
2237 "%s(%d): ", ctxt->input->filename,
2238 ctxt->input->line);
2239 xmlGenericError(xmlGenericErrorContext,
2240 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2241 }
2242 ret = inputPush(ctxt, input);
2243 if (ctxt->instate == XML_PARSER_EOF)
2244 return(-1);
2245 GROW;
2246 return(ret);
2247 }
2248
2249 /**
2250 * xmlParseCharRef:
2251 * @ctxt: an XML parser context
2252 *
2253 * parse Reference declarations
2254 *
2255 * [66] CharRef ::= '&#' [0-9]+ ';' |
2256 * '&#x' [0-9a-fA-F]+ ';'
2257 *
2258 * [ WFC: Legal Character ]
2259 * Characters referred to using character references must match the
2260 * production for Char.
2261 *
2262 * Returns the value parsed (as an int), 0 in case of error
2263 */
2264 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2265 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2266 unsigned int val = 0;
2267 int count = 0;
2268 unsigned int outofrange = 0;
2269
2270 /*
2271 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2272 */
2273 if ((RAW == '&') && (NXT(1) == '#') &&
2274 (NXT(2) == 'x')) {
2275 SKIP(3);
2276 GROW;
2277 while (RAW != ';') { /* loop blocked by count */
2278 if (count++ > 20) {
2279 count = 0;
2280 GROW;
2281 if (ctxt->instate == XML_PARSER_EOF)
2282 return(0);
2283 }
2284 if ((RAW >= '0') && (RAW <= '9'))
2285 val = val * 16 + (CUR - '0');
2286 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2287 val = val * 16 + (CUR - 'a') + 10;
2288 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2289 val = val * 16 + (CUR - 'A') + 10;
2290 else {
2291 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2292 val = 0;
2293 break;
2294 }
2295 if (val > 0x10FFFF)
2296 outofrange = val;
2297
2298 NEXT;
2299 count++;
2300 }
2301 if (RAW == ';') {
2302 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2303 ctxt->input->col++;
2304 ctxt->nbChars ++;
2305 ctxt->input->cur++;
2306 }
2307 } else if ((RAW == '&') && (NXT(1) == '#')) {
2308 SKIP(2);
2309 GROW;
2310 while (RAW != ';') { /* loop blocked by count */
2311 if (count++ > 20) {
2312 count = 0;
2313 GROW;
2314 if (ctxt->instate == XML_PARSER_EOF)
2315 return(0);
2316 }
2317 if ((RAW >= '0') && (RAW <= '9'))
2318 val = val * 10 + (CUR - '0');
2319 else {
2320 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2321 val = 0;
2322 break;
2323 }
2324 if (val > 0x10FFFF)
2325 outofrange = val;
2326
2327 NEXT;
2328 count++;
2329 }
2330 if (RAW == ';') {
2331 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2332 ctxt->input->col++;
2333 ctxt->nbChars ++;
2334 ctxt->input->cur++;
2335 }
2336 } else {
2337 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2338 }
2339
2340 /*
2341 * [ WFC: Legal Character ]
2342 * Characters referred to using character references must match the
2343 * production for Char.
2344 */
2345 if ((IS_CHAR(val) && (outofrange == 0))) {
2346 return(val);
2347 } else {
2348 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2349 "xmlParseCharRef: invalid xmlChar value %d\n",
2350 val);
2351 }
2352 return(0);
2353 }
2354
2355 /**
2356 * xmlParseStringCharRef:
2357 * @ctxt: an XML parser context
2358 * @str: a pointer to an index in the string
2359 *
2360 * parse Reference declarations, variant parsing from a string rather
2361 * than an an input flow.
2362 *
2363 * [66] CharRef ::= '&#' [0-9]+ ';' |
2364 * '&#x' [0-9a-fA-F]+ ';'
2365 *
2366 * [ WFC: Legal Character ]
2367 * Characters referred to using character references must match the
2368 * production for Char.
2369 *
2370 * Returns the value parsed (as an int), 0 in case of error, str will be
2371 * updated to the current value of the index
2372 */
2373 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2374 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2375 const xmlChar *ptr;
2376 xmlChar cur;
2377 unsigned int val = 0;
2378 unsigned int outofrange = 0;
2379
2380 if ((str == NULL) || (*str == NULL)) return(0);
2381 ptr = *str;
2382 cur = *ptr;
2383 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2384 ptr += 3;
2385 cur = *ptr;
2386 while (cur != ';') { /* Non input consuming loop */
2387 if ((cur >= '0') && (cur <= '9'))
2388 val = val * 16 + (cur - '0');
2389 else if ((cur >= 'a') && (cur <= 'f'))
2390 val = val * 16 + (cur - 'a') + 10;
2391 else if ((cur >= 'A') && (cur <= 'F'))
2392 val = val * 16 + (cur - 'A') + 10;
2393 else {
2394 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2395 val = 0;
2396 break;
2397 }
2398 if (val > 0x10FFFF)
2399 outofrange = val;
2400
2401 ptr++;
2402 cur = *ptr;
2403 }
2404 if (cur == ';')
2405 ptr++;
2406 } else if ((cur == '&') && (ptr[1] == '#')){
2407 ptr += 2;
2408 cur = *ptr;
2409 while (cur != ';') { /* Non input consuming loops */
2410 if ((cur >= '0') && (cur <= '9'))
2411 val = val * 10 + (cur - '0');
2412 else {
2413 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2414 val = 0;
2415 break;
2416 }
2417 if (val > 0x10FFFF)
2418 outofrange = val;
2419
2420 ptr++;
2421 cur = *ptr;
2422 }
2423 if (cur == ';')
2424 ptr++;
2425 } else {
2426 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2427 return(0);
2428 }
2429 *str = ptr;
2430
2431 /*
2432 * [ WFC: Legal Character ]
2433 * Characters referred to using character references must match the
2434 * production for Char.
2435 */
2436 if ((IS_CHAR(val) && (outofrange == 0))) {
2437 return(val);
2438 } else {
2439 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2440 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2441 val);
2442 }
2443 return(0);
2444 }
2445
2446 /**
2447 * xmlNewBlanksWrapperInputStream:
2448 * @ctxt: an XML parser context
2449 * @entity: an Entity pointer
2450 *
2451 * Create a new input stream for wrapping
2452 * blanks around a PEReference
2453 *
2454 * Returns the new input stream or NULL
2455 */
2456
deallocblankswrapper(xmlChar * str)2457 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2458
2459 static xmlParserInputPtr
xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)2460 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2461 xmlParserInputPtr input;
2462 xmlChar *buffer;
2463 size_t length;
2464 if (entity == NULL) {
2465 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2466 "xmlNewBlanksWrapperInputStream entity\n");
2467 return(NULL);
2468 }
2469 if (xmlParserDebugEntities)
2470 xmlGenericError(xmlGenericErrorContext,
2471 "new blanks wrapper for entity: %s\n", entity->name);
2472 input = xmlNewInputStream(ctxt);
2473 if (input == NULL) {
2474 return(NULL);
2475 }
2476 length = xmlStrlen(entity->name) + 5;
2477 buffer = xmlMallocAtomic(length);
2478 if (buffer == NULL) {
2479 xmlErrMemory(ctxt, NULL);
2480 xmlFree(input);
2481 return(NULL);
2482 }
2483 buffer [0] = ' ';
2484 buffer [1] = '%';
2485 buffer [length-3] = ';';
2486 buffer [length-2] = ' ';
2487 buffer [length-1] = 0;
2488 memcpy(buffer + 2, entity->name, length - 5);
2489 input->free = deallocblankswrapper;
2490 input->base = buffer;
2491 input->cur = buffer;
2492 input->length = length;
2493 input->end = &buffer[length];
2494 return(input);
2495 }
2496
2497 /**
2498 * xmlParserHandlePEReference:
2499 * @ctxt: the parser context
2500 *
2501 * [69] PEReference ::= '%' Name ';'
2502 *
2503 * [ WFC: No Recursion ]
2504 * A parsed entity must not contain a recursive
2505 * reference to itself, either directly or indirectly.
2506 *
2507 * [ WFC: Entity Declared ]
2508 * In a document without any DTD, a document with only an internal DTD
2509 * subset which contains no parameter entity references, or a document
2510 * with "standalone='yes'", ... ... The declaration of a parameter
2511 * entity must precede any reference to it...
2512 *
2513 * [ VC: Entity Declared ]
2514 * In a document with an external subset or external parameter entities
2515 * with "standalone='no'", ... ... The declaration of a parameter entity
2516 * must precede any reference to it...
2517 *
2518 * [ WFC: In DTD ]
2519 * Parameter-entity references may only appear in the DTD.
2520 * NOTE: misleading but this is handled.
2521 *
2522 * A PEReference may have been detected in the current input stream
2523 * the handling is done accordingly to
2524 * http://www.w3.org/TR/REC-xml#entproc
2525 * i.e.
2526 * - Included in literal in entity values
2527 * - Included as Parameter Entity reference within DTDs
2528 */
2529 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2530 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2531 const xmlChar *name;
2532 xmlEntityPtr entity = NULL;
2533 xmlParserInputPtr input;
2534
2535 if (RAW != '%') return;
2536 switch(ctxt->instate) {
2537 case XML_PARSER_CDATA_SECTION:
2538 return;
2539 case XML_PARSER_COMMENT:
2540 return;
2541 case XML_PARSER_START_TAG:
2542 return;
2543 case XML_PARSER_END_TAG:
2544 return;
2545 case XML_PARSER_EOF:
2546 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2547 return;
2548 case XML_PARSER_PROLOG:
2549 case XML_PARSER_START:
2550 case XML_PARSER_MISC:
2551 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2552 return;
2553 case XML_PARSER_ENTITY_DECL:
2554 case XML_PARSER_CONTENT:
2555 case XML_PARSER_ATTRIBUTE_VALUE:
2556 case XML_PARSER_PI:
2557 case XML_PARSER_SYSTEM_LITERAL:
2558 case XML_PARSER_PUBLIC_LITERAL:
2559 /* we just ignore it there */
2560 return;
2561 case XML_PARSER_EPILOG:
2562 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2563 return;
2564 case XML_PARSER_ENTITY_VALUE:
2565 /*
2566 * NOTE: in the case of entity values, we don't do the
2567 * substitution here since we need the literal
2568 * entity value to be able to save the internal
2569 * subset of the document.
2570 * This will be handled by xmlStringDecodeEntities
2571 */
2572 return;
2573 case XML_PARSER_DTD:
2574 /*
2575 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2576 * In the internal DTD subset, parameter-entity references
2577 * can occur only where markup declarations can occur, not
2578 * within markup declarations.
2579 * In that case this is handled in xmlParseMarkupDecl
2580 */
2581 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2582 return;
2583 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2584 return;
2585 break;
2586 case XML_PARSER_IGNORE:
2587 return;
2588 }
2589
2590 NEXT;
2591 name = xmlParseName(ctxt);
2592 if (xmlParserDebugEntities)
2593 xmlGenericError(xmlGenericErrorContext,
2594 "PEReference: %s\n", name);
2595 if (name == NULL) {
2596 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2597 } else {
2598 if (RAW == ';') {
2599 NEXT;
2600 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2601 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2602 if (ctxt->instate == XML_PARSER_EOF)
2603 return;
2604 if (entity == NULL) {
2605
2606 /*
2607 * [ WFC: Entity Declared ]
2608 * In a document without any DTD, a document with only an
2609 * internal DTD subset which contains no parameter entity
2610 * references, or a document with "standalone='yes'", ...
2611 * ... The declaration of a parameter entity must precede
2612 * any reference to it...
2613 */
2614 if ((ctxt->standalone == 1) ||
2615 ((ctxt->hasExternalSubset == 0) &&
2616 (ctxt->hasPErefs == 0))) {
2617 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2618 "PEReference: %%%s; not found\n", name);
2619 } else {
2620 /*
2621 * [ VC: Entity Declared ]
2622 * In a document with an external subset or external
2623 * parameter entities with "standalone='no'", ...
2624 * ... The declaration of a parameter entity must precede
2625 * any reference to it...
2626 */
2627 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2628 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2629 "PEReference: %%%s; not found\n",
2630 name, NULL);
2631 } else
2632 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2633 "PEReference: %%%s; not found\n",
2634 name, NULL);
2635 ctxt->valid = 0;
2636 }
2637 xmlParserEntityCheck(ctxt, 0, NULL, 0);
2638 } else if (ctxt->input->free != deallocblankswrapper) {
2639 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2640 if (xmlPushInput(ctxt, input) < 0)
2641 return;
2642 } else {
2643 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2644 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2645 xmlChar start[4];
2646 xmlCharEncoding enc;
2647
2648 /*
2649 * Note: external parameter entities will not be loaded, it
2650 * is not required for a non-validating parser, unless the
2651 * option of validating, or substituting entities were
2652 * given. Doing so is far more secure as the parser will
2653 * only process data coming from the document entity by
2654 * default.
2655 */
2656 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2657 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
2658 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
2659 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
2660 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
2661 (ctxt->replaceEntities == 0) &&
2662 (ctxt->validate == 0))
2663 return;
2664
2665 /*
2666 * handle the extra spaces added before and after
2667 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2668 * this is done independently.
2669 */
2670 input = xmlNewEntityInputStream(ctxt, entity);
2671 if (xmlPushInput(ctxt, input) < 0)
2672 return;
2673
2674 /*
2675 * Get the 4 first bytes and decode the charset
2676 * if enc != XML_CHAR_ENCODING_NONE
2677 * plug some encoding conversion routines.
2678 * Note that, since we may have some non-UTF8
2679 * encoding (like UTF16, bug 135229), the 'length'
2680 * is not known, but we can calculate based upon
2681 * the amount of data in the buffer.
2682 */
2683 GROW
2684 if (ctxt->instate == XML_PARSER_EOF)
2685 return;
2686 if ((ctxt->input->end - ctxt->input->cur)>=4) {
2687 start[0] = RAW;
2688 start[1] = NXT(1);
2689 start[2] = NXT(2);
2690 start[3] = NXT(3);
2691 enc = xmlDetectCharEncoding(start, 4);
2692 if (enc != XML_CHAR_ENCODING_NONE) {
2693 xmlSwitchEncoding(ctxt, enc);
2694 }
2695 }
2696
2697 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2698 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2699 (IS_BLANK_CH(NXT(5)))) {
2700 xmlParseTextDecl(ctxt);
2701 }
2702 } else {
2703 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2704 "PEReference: %s is not a parameter entity\n",
2705 name);
2706 }
2707 }
2708 } else {
2709 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2710 }
2711 }
2712 }
2713
2714 /*
2715 * Macro used to grow the current buffer.
2716 * buffer##_size is expected to be a size_t
2717 * mem_error: is expected to handle memory allocation failures
2718 */
2719 #define growBuffer(buffer, n) { \
2720 xmlChar *tmp; \
2721 size_t new_size = buffer##_size * 2 + n; \
2722 if (new_size < buffer##_size) goto mem_error; \
2723 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2724 if (tmp == NULL) goto mem_error; \
2725 buffer = tmp; \
2726 buffer##_size = new_size; \
2727 }
2728
2729 /**
2730 * xmlStringLenDecodeEntities:
2731 * @ctxt: the parser context
2732 * @str: the input string
2733 * @len: the string length
2734 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2735 * @end: an end marker xmlChar, 0 if none
2736 * @end2: an end marker xmlChar, 0 if none
2737 * @end3: an end marker xmlChar, 0 if none
2738 *
2739 * Takes a entity string content and process to do the adequate substitutions.
2740 *
2741 * [67] Reference ::= EntityRef | CharRef
2742 *
2743 * [69] PEReference ::= '%' Name ';'
2744 *
2745 * Returns A newly allocated string with the substitution done. The caller
2746 * must deallocate it !
2747 */
2748 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3)2749 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2750 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2751 xmlChar *buffer = NULL;
2752 size_t buffer_size = 0;
2753 size_t nbchars = 0;
2754
2755 xmlChar *current = NULL;
2756 xmlChar *rep = NULL;
2757 const xmlChar *last;
2758 xmlEntityPtr ent;
2759 int c,l;
2760
2761 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2762 return(NULL);
2763 last = str + len;
2764
2765 if (((ctxt->depth > 40) &&
2766 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2767 (ctxt->depth > 1024)) {
2768 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2769 return(NULL);
2770 }
2771
2772 /*
2773 * allocate a translation buffer.
2774 */
2775 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2776 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2777 if (buffer == NULL) goto mem_error;
2778
2779 /*
2780 * OK loop until we reach one of the ending char or a size limit.
2781 * we are operating on already parsed values.
2782 */
2783 if (str < last)
2784 c = CUR_SCHAR(str, l);
2785 else
2786 c = 0;
2787 while ((c != 0) && (c != end) && /* non input consuming loop */
2788 (c != end2) && (c != end3)) {
2789
2790 if (c == 0) break;
2791 if ((c == '&') && (str[1] == '#')) {
2792 int val = xmlParseStringCharRef(ctxt, &str);
2793 if (val != 0) {
2794 COPY_BUF(0,buffer,nbchars,val);
2795 }
2796 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2797 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2798 }
2799 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2800 if (xmlParserDebugEntities)
2801 xmlGenericError(xmlGenericErrorContext,
2802 "String decoding Entity Reference: %.30s\n",
2803 str);
2804 ent = xmlParseStringEntityRef(ctxt, &str);
2805 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2806 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2807 goto int_error;
2808 xmlParserEntityCheck(ctxt, 0, ent, 0);
2809 if (ent != NULL)
2810 ctxt->nbentities += ent->checked / 2;
2811 if ((ent != NULL) &&
2812 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2813 if (ent->content != NULL) {
2814 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2815 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2816 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2817 }
2818 } else {
2819 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2820 "predefined entity has no content\n");
2821 }
2822 } else if ((ent != NULL) && (ent->content != NULL)) {
2823 ctxt->depth++;
2824 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2825 0, 0, 0);
2826 ctxt->depth--;
2827
2828 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2829 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2830 goto int_error;
2831
2832 if (rep != NULL) {
2833 current = rep;
2834 while (*current != 0) { /* non input consuming loop */
2835 buffer[nbchars++] = *current++;
2836 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2837 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2838 goto int_error;
2839 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2840 }
2841 }
2842 xmlFree(rep);
2843 rep = NULL;
2844 }
2845 } else if (ent != NULL) {
2846 int i = xmlStrlen(ent->name);
2847 const xmlChar *cur = ent->name;
2848
2849 buffer[nbchars++] = '&';
2850 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2851 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2852 }
2853 for (;i > 0;i--)
2854 buffer[nbchars++] = *cur++;
2855 buffer[nbchars++] = ';';
2856 }
2857 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2858 if (xmlParserDebugEntities)
2859 xmlGenericError(xmlGenericErrorContext,
2860 "String decoding PE Reference: %.30s\n", str);
2861 ent = xmlParseStringPEReference(ctxt, &str);
2862 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2863 goto int_error;
2864 xmlParserEntityCheck(ctxt, 0, ent, 0);
2865 if (ent != NULL)
2866 ctxt->nbentities += ent->checked / 2;
2867 if (ent != NULL) {
2868 if (ent->content == NULL) {
2869 /*
2870 * Note: external parsed entities will not be loaded,
2871 * it is not required for a non-validating parser to
2872 * complete external PEreferences coming from the
2873 * internal subset
2874 */
2875 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2876 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2877 (ctxt->validate != 0)) {
2878 xmlLoadEntityContent(ctxt, ent);
2879 } else {
2880 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2881 "not validating will not read content for PE entity %s\n",
2882 ent->name, NULL);
2883 }
2884 }
2885 ctxt->depth++;
2886 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2887 0, 0, 0);
2888 ctxt->depth--;
2889 if (rep != NULL) {
2890 current = rep;
2891 while (*current != 0) { /* non input consuming loop */
2892 buffer[nbchars++] = *current++;
2893 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2894 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2895 goto int_error;
2896 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2897 }
2898 }
2899 xmlFree(rep);
2900 rep = NULL;
2901 }
2902 }
2903 } else {
2904 COPY_BUF(l,buffer,nbchars,c);
2905 str += l;
2906 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2907 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2908 }
2909 }
2910 if (str < last)
2911 c = CUR_SCHAR(str, l);
2912 else
2913 c = 0;
2914 }
2915 buffer[nbchars] = 0;
2916 return(buffer);
2917
2918 mem_error:
2919 xmlErrMemory(ctxt, NULL);
2920 int_error:
2921 if (rep != NULL)
2922 xmlFree(rep);
2923 if (buffer != NULL)
2924 xmlFree(buffer);
2925 return(NULL);
2926 }
2927
2928 /**
2929 * xmlStringDecodeEntities:
2930 * @ctxt: the parser context
2931 * @str: the input string
2932 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2933 * @end: an end marker xmlChar, 0 if none
2934 * @end2: an end marker xmlChar, 0 if none
2935 * @end3: an end marker xmlChar, 0 if none
2936 *
2937 * Takes a entity string content and process to do the adequate substitutions.
2938 *
2939 * [67] Reference ::= EntityRef | CharRef
2940 *
2941 * [69] PEReference ::= '%' Name ';'
2942 *
2943 * Returns A newly allocated string with the substitution done. The caller
2944 * must deallocate it !
2945 */
2946 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what,xmlChar end,xmlChar end2,xmlChar end3)2947 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2948 xmlChar end, xmlChar end2, xmlChar end3) {
2949 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2950 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2951 end, end2, end3));
2952 }
2953
2954 /************************************************************************
2955 * *
2956 * Commodity functions, cleanup needed ? *
2957 * *
2958 ************************************************************************/
2959
2960 /**
2961 * areBlanks:
2962 * @ctxt: an XML parser context
2963 * @str: a xmlChar *
2964 * @len: the size of @str
2965 * @blank_chars: we know the chars are blanks
2966 *
2967 * Is this a sequence of blank chars that one can ignore ?
2968 *
2969 * Returns 1 if ignorable 0 otherwise.
2970 */
2971
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2972 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2973 int blank_chars) {
2974 int i, ret;
2975 xmlNodePtr lastChild;
2976
2977 /*
2978 * Don't spend time trying to differentiate them, the same callback is
2979 * used !
2980 */
2981 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2982 return(0);
2983
2984 /*
2985 * Check for xml:space value.
2986 */
2987 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2988 (*(ctxt->space) == -2))
2989 return(0);
2990
2991 /*
2992 * Check that the string is made of blanks
2993 */
2994 if (blank_chars == 0) {
2995 for (i = 0;i < len;i++)
2996 if (!(IS_BLANK_CH(str[i]))) return(0);
2997 }
2998
2999 /*
3000 * Look if the element is mixed content in the DTD if available
3001 */
3002 if (ctxt->node == NULL) return(0);
3003 if (ctxt->myDoc != NULL) {
3004 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
3005 if (ret == 0) return(1);
3006 if (ret == 1) return(0);
3007 }
3008
3009 /*
3010 * Otherwise, heuristic :-\
3011 */
3012 if ((RAW != '<') && (RAW != 0xD)) return(0);
3013 if ((ctxt->node->children == NULL) &&
3014 (RAW == '<') && (NXT(1) == '/')) return(0);
3015
3016 lastChild = xmlGetLastChild(ctxt->node);
3017 if (lastChild == NULL) {
3018 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
3019 (ctxt->node->content != NULL)) return(0);
3020 } else if (xmlNodeIsText(lastChild))
3021 return(0);
3022 else if ((ctxt->node->children != NULL) &&
3023 (xmlNodeIsText(ctxt->node->children)))
3024 return(0);
3025 return(1);
3026 }
3027
3028 /************************************************************************
3029 * *
3030 * Extra stuff for namespace support *
3031 * Relates to http://www.w3.org/TR/WD-xml-names *
3032 * *
3033 ************************************************************************/
3034
3035 /**
3036 * xmlSplitQName:
3037 * @ctxt: an XML parser context
3038 * @name: an XML parser context
3039 * @prefix: a xmlChar **
3040 *
3041 * parse an UTF8 encoded XML qualified name string
3042 *
3043 * [NS 5] QName ::= (Prefix ':')? LocalPart
3044 *
3045 * [NS 6] Prefix ::= NCName
3046 *
3047 * [NS 7] LocalPart ::= NCName
3048 *
3049 * Returns the local part, and prefix is updated
3050 * to get the Prefix if any.
3051 */
3052
3053 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefix)3054 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3055 xmlChar buf[XML_MAX_NAMELEN + 5];
3056 xmlChar *buffer = NULL;
3057 int len = 0;
3058 int max = XML_MAX_NAMELEN;
3059 xmlChar *ret = NULL;
3060 const xmlChar *cur = name;
3061 int c;
3062
3063 if (prefix == NULL) return(NULL);
3064 *prefix = NULL;
3065
3066 if (cur == NULL) return(NULL);
3067
3068 #ifndef XML_XML_NAMESPACE
3069 /* xml: prefix is not really a namespace */
3070 if ((cur[0] == 'x') && (cur[1] == 'm') &&
3071 (cur[2] == 'l') && (cur[3] == ':'))
3072 return(xmlStrdup(name));
3073 #endif
3074
3075 /* nasty but well=formed */
3076 if (cur[0] == ':')
3077 return(xmlStrdup(name));
3078
3079 c = *cur++;
3080 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3081 buf[len++] = c;
3082 c = *cur++;
3083 }
3084 if (len >= max) {
3085 /*
3086 * Okay someone managed to make a huge name, so he's ready to pay
3087 * for the processing speed.
3088 */
3089 max = len * 2;
3090
3091 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3092 if (buffer == NULL) {
3093 xmlErrMemory(ctxt, NULL);
3094 return(NULL);
3095 }
3096 memcpy(buffer, buf, len);
3097 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3098 if (len + 10 > max) {
3099 xmlChar *tmp;
3100
3101 max *= 2;
3102 tmp = (xmlChar *) xmlRealloc(buffer,
3103 max * sizeof(xmlChar));
3104 if (tmp == NULL) {
3105 xmlFree(buffer);
3106 xmlErrMemory(ctxt, NULL);
3107 return(NULL);
3108 }
3109 buffer = tmp;
3110 }
3111 buffer[len++] = c;
3112 c = *cur++;
3113 }
3114 buffer[len] = 0;
3115 }
3116
3117 if ((c == ':') && (*cur == 0)) {
3118 if (buffer != NULL)
3119 xmlFree(buffer);
3120 *prefix = NULL;
3121 return(xmlStrdup(name));
3122 }
3123
3124 if (buffer == NULL)
3125 ret = xmlStrndup(buf, len);
3126 else {
3127 ret = buffer;
3128 buffer = NULL;
3129 max = XML_MAX_NAMELEN;
3130 }
3131
3132
3133 if (c == ':') {
3134 c = *cur;
3135 *prefix = ret;
3136 if (c == 0) {
3137 return(xmlStrndup(BAD_CAST "", 0));
3138 }
3139 len = 0;
3140
3141 /*
3142 * Check that the first character is proper to start
3143 * a new name
3144 */
3145 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3146 ((c >= 0x41) && (c <= 0x5A)) ||
3147 (c == '_') || (c == ':'))) {
3148 int l;
3149 int first = CUR_SCHAR(cur, l);
3150
3151 if (!IS_LETTER(first) && (first != '_')) {
3152 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3153 "Name %s is not XML Namespace compliant\n",
3154 name);
3155 }
3156 }
3157 cur++;
3158
3159 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3160 buf[len++] = c;
3161 c = *cur++;
3162 }
3163 if (len >= max) {
3164 /*
3165 * Okay someone managed to make a huge name, so he's ready to pay
3166 * for the processing speed.
3167 */
3168 max = len * 2;
3169
3170 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3171 if (buffer == NULL) {
3172 xmlErrMemory(ctxt, NULL);
3173 return(NULL);
3174 }
3175 memcpy(buffer, buf, len);
3176 while (c != 0) { /* tested bigname2.xml */
3177 if (len + 10 > max) {
3178 xmlChar *tmp;
3179
3180 max *= 2;
3181 tmp = (xmlChar *) xmlRealloc(buffer,
3182 max * sizeof(xmlChar));
3183 if (tmp == NULL) {
3184 xmlErrMemory(ctxt, NULL);
3185 xmlFree(buffer);
3186 return(NULL);
3187 }
3188 buffer = tmp;
3189 }
3190 buffer[len++] = c;
3191 c = *cur++;
3192 }
3193 buffer[len] = 0;
3194 }
3195
3196 if (buffer == NULL)
3197 ret = xmlStrndup(buf, len);
3198 else {
3199 ret = buffer;
3200 }
3201 }
3202
3203 return(ret);
3204 }
3205
3206 /************************************************************************
3207 * *
3208 * The parser itself *
3209 * Relates to http://www.w3.org/TR/REC-xml *
3210 * *
3211 ************************************************************************/
3212
3213 /************************************************************************
3214 * *
3215 * Routines to parse Name, NCName and NmToken *
3216 * *
3217 ************************************************************************/
3218 #ifdef DEBUG
3219 static unsigned long nbParseName = 0;
3220 static unsigned long nbParseNmToken = 0;
3221 static unsigned long nbParseNCName = 0;
3222 static unsigned long nbParseNCNameComplex = 0;
3223 static unsigned long nbParseNameComplex = 0;
3224 static unsigned long nbParseStringName = 0;
3225 #endif
3226
3227 /*
3228 * The two following functions are related to the change of accepted
3229 * characters for Name and NmToken in the Revision 5 of XML-1.0
3230 * They correspond to the modified production [4] and the new production [4a]
3231 * changes in that revision. Also note that the macros used for the
3232 * productions Letter, Digit, CombiningChar and Extender are not needed
3233 * anymore.
3234 * We still keep compatibility to pre-revision5 parsing semantic if the
3235 * new XML_PARSE_OLD10 option is given to the parser.
3236 */
3237 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3238 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3239 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3240 /*
3241 * Use the new checks of production [4] [4a] amd [5] of the
3242 * Update 5 of XML-1.0
3243 */
3244 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3245 (((c >= 'a') && (c <= 'z')) ||
3246 ((c >= 'A') && (c <= 'Z')) ||
3247 (c == '_') || (c == ':') ||
3248 ((c >= 0xC0) && (c <= 0xD6)) ||
3249 ((c >= 0xD8) && (c <= 0xF6)) ||
3250 ((c >= 0xF8) && (c <= 0x2FF)) ||
3251 ((c >= 0x370) && (c <= 0x37D)) ||
3252 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3253 ((c >= 0x200C) && (c <= 0x200D)) ||
3254 ((c >= 0x2070) && (c <= 0x218F)) ||
3255 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3256 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3257 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3258 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3259 ((c >= 0x10000) && (c <= 0xEFFFF))))
3260 return(1);
3261 } else {
3262 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3263 return(1);
3264 }
3265 return(0);
3266 }
3267
3268 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3269 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3270 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3271 /*
3272 * Use the new checks of production [4] [4a] amd [5] of the
3273 * Update 5 of XML-1.0
3274 */
3275 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3276 (((c >= 'a') && (c <= 'z')) ||
3277 ((c >= 'A') && (c <= 'Z')) ||
3278 ((c >= '0') && (c <= '9')) || /* !start */
3279 (c == '_') || (c == ':') ||
3280 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3281 ((c >= 0xC0) && (c <= 0xD6)) ||
3282 ((c >= 0xD8) && (c <= 0xF6)) ||
3283 ((c >= 0xF8) && (c <= 0x2FF)) ||
3284 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3285 ((c >= 0x370) && (c <= 0x37D)) ||
3286 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3287 ((c >= 0x200C) && (c <= 0x200D)) ||
3288 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3289 ((c >= 0x2070) && (c <= 0x218F)) ||
3290 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3291 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3292 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3293 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3294 ((c >= 0x10000) && (c <= 0xEFFFF))))
3295 return(1);
3296 } else {
3297 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3298 (c == '.') || (c == '-') ||
3299 (c == '_') || (c == ':') ||
3300 (IS_COMBINING(c)) ||
3301 (IS_EXTENDER(c)))
3302 return(1);
3303 }
3304 return(0);
3305 }
3306
3307 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3308 int *len, int *alloc, int normalize);
3309
3310 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3311 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3312 int len = 0, l;
3313 int c;
3314 int count = 0;
3315
3316 #ifdef DEBUG
3317 nbParseNameComplex++;
3318 #endif
3319
3320 /*
3321 * Handler for more complex cases
3322 */
3323 GROW;
3324 if (ctxt->instate == XML_PARSER_EOF)
3325 return(NULL);
3326 c = CUR_CHAR(l);
3327 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3328 /*
3329 * Use the new checks of production [4] [4a] amd [5] of the
3330 * Update 5 of XML-1.0
3331 */
3332 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3333 (!(((c >= 'a') && (c <= 'z')) ||
3334 ((c >= 'A') && (c <= 'Z')) ||
3335 (c == '_') || (c == ':') ||
3336 ((c >= 0xC0) && (c <= 0xD6)) ||
3337 ((c >= 0xD8) && (c <= 0xF6)) ||
3338 ((c >= 0xF8) && (c <= 0x2FF)) ||
3339 ((c >= 0x370) && (c <= 0x37D)) ||
3340 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3341 ((c >= 0x200C) && (c <= 0x200D)) ||
3342 ((c >= 0x2070) && (c <= 0x218F)) ||
3343 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3344 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3345 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3346 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3347 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3348 return(NULL);
3349 }
3350 len += l;
3351 NEXTL(l);
3352 c = CUR_CHAR(l);
3353 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3354 (((c >= 'a') && (c <= 'z')) ||
3355 ((c >= 'A') && (c <= 'Z')) ||
3356 ((c >= '0') && (c <= '9')) || /* !start */
3357 (c == '_') || (c == ':') ||
3358 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3359 ((c >= 0xC0) && (c <= 0xD6)) ||
3360 ((c >= 0xD8) && (c <= 0xF6)) ||
3361 ((c >= 0xF8) && (c <= 0x2FF)) ||
3362 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3363 ((c >= 0x370) && (c <= 0x37D)) ||
3364 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3365 ((c >= 0x200C) && (c <= 0x200D)) ||
3366 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3367 ((c >= 0x2070) && (c <= 0x218F)) ||
3368 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3369 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3370 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3371 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3372 ((c >= 0x10000) && (c <= 0xEFFFF))
3373 )) {
3374 if (count++ > XML_PARSER_CHUNK_SIZE) {
3375 count = 0;
3376 GROW;
3377 if (ctxt->instate == XML_PARSER_EOF)
3378 return(NULL);
3379 }
3380 len += l;
3381 NEXTL(l);
3382 c = CUR_CHAR(l);
3383 }
3384 } else {
3385 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3386 (!IS_LETTER(c) && (c != '_') &&
3387 (c != ':'))) {
3388 return(NULL);
3389 }
3390 len += l;
3391 NEXTL(l);
3392 c = CUR_CHAR(l);
3393
3394 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3395 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3396 (c == '.') || (c == '-') ||
3397 (c == '_') || (c == ':') ||
3398 (IS_COMBINING(c)) ||
3399 (IS_EXTENDER(c)))) {
3400 if (count++ > XML_PARSER_CHUNK_SIZE) {
3401 count = 0;
3402 GROW;
3403 if (ctxt->instate == XML_PARSER_EOF)
3404 return(NULL);
3405 }
3406 len += l;
3407 NEXTL(l);
3408 c = CUR_CHAR(l);
3409 if (c == 0) {
3410 count = 0;
3411 GROW;
3412 if (ctxt->instate == XML_PARSER_EOF)
3413 return(NULL);
3414 c = CUR_CHAR(l);
3415 }
3416 }
3417 }
3418 if ((len > XML_MAX_NAME_LENGTH) &&
3419 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3420 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3421 return(NULL);
3422 }
3423 if (ctxt->input->cur > ctxt->input->base && (*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) {
3424 if (ctxt->input->base > ctxt->input->cur - (len + 1)) {
3425 return(NULL);
3426 }
3427 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3428 }
3429 if (ctxt->input->base > ctxt->input->cur - len) {
3430 return(NULL);
3431 }
3432 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3433 }
3434
3435 /**
3436 * xmlParseName:
3437 * @ctxt: an XML parser context
3438 *
3439 * parse an XML name.
3440 *
3441 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3442 * CombiningChar | Extender
3443 *
3444 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3445 *
3446 * [6] Names ::= Name (#x20 Name)*
3447 *
3448 * Returns the Name parsed or NULL
3449 */
3450
3451 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3452 xmlParseName(xmlParserCtxtPtr ctxt) {
3453 const xmlChar *in;
3454 const xmlChar *ret;
3455 int count = 0;
3456
3457 GROW;
3458
3459 #ifdef DEBUG
3460 nbParseName++;
3461 #endif
3462
3463 /*
3464 * Accelerator for simple ASCII names
3465 */
3466 in = ctxt->input->cur;
3467 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3468 ((*in >= 0x41) && (*in <= 0x5A)) ||
3469 (*in == '_') || (*in == ':')) {
3470 in++;
3471 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3472 ((*in >= 0x41) && (*in <= 0x5A)) ||
3473 ((*in >= 0x30) && (*in <= 0x39)) ||
3474 (*in == '_') || (*in == '-') ||
3475 (*in == ':') || (*in == '.'))
3476 in++;
3477 if ((*in > 0) && (*in < 0x80)) {
3478 count = in - ctxt->input->cur;
3479 if ((count > XML_MAX_NAME_LENGTH) &&
3480 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3481 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3482 return(NULL);
3483 }
3484 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3485 ctxt->input->cur = in;
3486 ctxt->nbChars += count;
3487 ctxt->input->col += count;
3488 if (ret == NULL)
3489 xmlErrMemory(ctxt, NULL);
3490 return(ret);
3491 }
3492 }
3493 /* accelerator for special cases */
3494 return(xmlParseNameComplex(ctxt));
3495 }
3496
3497 static const xmlChar *
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3498 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3499 int len = 0, l;
3500 int c;
3501 int count = 0;
3502 size_t startPosition = 0;
3503
3504 #ifdef DEBUG
3505 nbParseNCNameComplex++;
3506 #endif
3507
3508 /*
3509 * Handler for more complex cases
3510 */
3511 GROW;
3512 startPosition = CUR_PTR - BASE_PTR;
3513 c = CUR_CHAR(l);
3514 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3515 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3516 return(NULL);
3517 }
3518
3519 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3520 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3521 if (count++ > XML_PARSER_CHUNK_SIZE) {
3522 if ((len > XML_MAX_NAME_LENGTH) &&
3523 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3524 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3525 return(NULL);
3526 }
3527 count = 0;
3528 GROW;
3529 if (ctxt->instate == XML_PARSER_EOF)
3530 return(NULL);
3531 }
3532 len += l;
3533 NEXTL(l);
3534 c = CUR_CHAR(l);
3535 if (c == 0) {
3536 count = 0;
3537 /*
3538 * when shrinking to extend the buffer we really need to preserve
3539 * the part of the name we already parsed. Hence rolling back
3540 * by current lenght.
3541 */
3542 ctxt->input->cur -= l;
3543 GROW;
3544 ctxt->input->cur += l;
3545 if (ctxt->instate == XML_PARSER_EOF)
3546 return(NULL);
3547 c = CUR_CHAR(l);
3548 }
3549 }
3550 if ((len > XML_MAX_NAME_LENGTH) &&
3551 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3552 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3553 return(NULL);
3554 }
3555 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3556 }
3557
3558 /**
3559 * xmlParseNCName:
3560 * @ctxt: an XML parser context
3561 * @len: length of the string parsed
3562 *
3563 * parse an XML name.
3564 *
3565 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3566 * CombiningChar | Extender
3567 *
3568 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3569 *
3570 * Returns the Name parsed or NULL
3571 */
3572
3573 static const xmlChar *
xmlParseNCName(xmlParserCtxtPtr ctxt)3574 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3575 const xmlChar *in, *e;
3576 const xmlChar *ret;
3577 int count = 0;
3578
3579 #ifdef DEBUG
3580 nbParseNCName++;
3581 #endif
3582
3583 /*
3584 * Accelerator for simple ASCII names
3585 */
3586 in = ctxt->input->cur;
3587 e = ctxt->input->end;
3588 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3589 ((*in >= 0x41) && (*in <= 0x5A)) ||
3590 (*in == '_')) && (in < e)) {
3591 in++;
3592 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3593 ((*in >= 0x41) && (*in <= 0x5A)) ||
3594 ((*in >= 0x30) && (*in <= 0x39)) ||
3595 (*in == '_') || (*in == '-') ||
3596 (*in == '.')) && (in < e))
3597 in++;
3598 if (in >= e)
3599 goto complex;
3600 if ((*in > 0) && (*in < 0x80)) {
3601 count = in - ctxt->input->cur;
3602 if ((count > XML_MAX_NAME_LENGTH) &&
3603 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3604 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3605 return(NULL);
3606 }
3607 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3608 ctxt->input->cur = in;
3609 ctxt->nbChars += count;
3610 ctxt->input->col += count;
3611 if (ret == NULL) {
3612 xmlErrMemory(ctxt, NULL);
3613 }
3614 return(ret);
3615 }
3616 }
3617 complex:
3618 return(xmlParseNCNameComplex(ctxt));
3619 }
3620
3621 /**
3622 * xmlParseNameAndCompare:
3623 * @ctxt: an XML parser context
3624 *
3625 * parse an XML name and compares for match
3626 * (specialized for endtag parsing)
3627 *
3628 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3629 * and the name for mismatch
3630 */
3631
3632 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3633 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3634 register const xmlChar *cmp = other;
3635 register const xmlChar *in;
3636 const xmlChar *ret;
3637
3638 GROW;
3639 if (ctxt->instate == XML_PARSER_EOF)
3640 return(NULL);
3641
3642 in = ctxt->input->cur;
3643 while (*in != 0 && *in == *cmp) {
3644 ++in;
3645 ++cmp;
3646 ctxt->input->col++;
3647 }
3648 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3649 /* success */
3650 ctxt->input->cur = in;
3651 return (const xmlChar*) 1;
3652 }
3653 /* failure (or end of input buffer), check with full function */
3654 ret = xmlParseName (ctxt);
3655 /* strings coming from the dictionary direct compare possible */
3656 if (ret == other) {
3657 return (const xmlChar*) 1;
3658 }
3659 return ret;
3660 }
3661
3662 /**
3663 * xmlParseStringName:
3664 * @ctxt: an XML parser context
3665 * @str: a pointer to the string pointer (IN/OUT)
3666 *
3667 * parse an XML name.
3668 *
3669 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3670 * CombiningChar | Extender
3671 *
3672 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3673 *
3674 * [6] Names ::= Name (#x20 Name)*
3675 *
3676 * Returns the Name parsed or NULL. The @str pointer
3677 * is updated to the current location in the string.
3678 */
3679
3680 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3681 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3682 xmlChar buf[XML_MAX_NAMELEN + 5];
3683 const xmlChar *cur = *str;
3684 int len = 0, l;
3685 int c;
3686
3687 #ifdef DEBUG
3688 nbParseStringName++;
3689 #endif
3690
3691 c = CUR_SCHAR(cur, l);
3692 if (!xmlIsNameStartChar(ctxt, c)) {
3693 return(NULL);
3694 }
3695
3696 COPY_BUF(l,buf,len,c);
3697 cur += l;
3698 c = CUR_SCHAR(cur, l);
3699 while (xmlIsNameChar(ctxt, c)) {
3700 COPY_BUF(l,buf,len,c);
3701 cur += l;
3702 c = CUR_SCHAR(cur, l);
3703 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3704 /*
3705 * Okay someone managed to make a huge name, so he's ready to pay
3706 * for the processing speed.
3707 */
3708 xmlChar *buffer;
3709 int max = len * 2;
3710
3711 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3712 if (buffer == NULL) {
3713 xmlErrMemory(ctxt, NULL);
3714 return(NULL);
3715 }
3716 memcpy(buffer, buf, len);
3717 while (xmlIsNameChar(ctxt, c)) {
3718 if (len + 10 > max) {
3719 xmlChar *tmp;
3720
3721 if ((len > XML_MAX_NAME_LENGTH) &&
3722 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3723 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3724 xmlFree(buffer);
3725 return(NULL);
3726 }
3727 max *= 2;
3728 tmp = (xmlChar *) xmlRealloc(buffer,
3729 max * sizeof(xmlChar));
3730 if (tmp == NULL) {
3731 xmlErrMemory(ctxt, NULL);
3732 xmlFree(buffer);
3733 return(NULL);
3734 }
3735 buffer = tmp;
3736 }
3737 COPY_BUF(l,buffer,len,c);
3738 cur += l;
3739 c = CUR_SCHAR(cur, l);
3740 }
3741 buffer[len] = 0;
3742 *str = cur;
3743 return(buffer);
3744 }
3745 }
3746 if ((len > XML_MAX_NAME_LENGTH) &&
3747 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3748 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3749 return(NULL);
3750 }
3751 *str = cur;
3752 return(xmlStrndup(buf, len));
3753 }
3754
3755 /**
3756 * xmlParseNmtoken:
3757 * @ctxt: an XML parser context
3758 *
3759 * parse an XML Nmtoken.
3760 *
3761 * [7] Nmtoken ::= (NameChar)+
3762 *
3763 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3764 *
3765 * Returns the Nmtoken parsed or NULL
3766 */
3767
3768 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3769 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3770 xmlChar buf[XML_MAX_NAMELEN + 5];
3771 int len = 0, l;
3772 int c;
3773 int count = 0;
3774
3775 #ifdef DEBUG
3776 nbParseNmToken++;
3777 #endif
3778
3779 GROW;
3780 if (ctxt->instate == XML_PARSER_EOF)
3781 return(NULL);
3782 c = CUR_CHAR(l);
3783
3784 while (xmlIsNameChar(ctxt, c)) {
3785 if (count++ > XML_PARSER_CHUNK_SIZE) {
3786 count = 0;
3787 GROW;
3788 }
3789 COPY_BUF(l,buf,len,c);
3790 NEXTL(l);
3791 c = CUR_CHAR(l);
3792 if (c == 0) {
3793 count = 0;
3794 GROW;
3795 if (ctxt->instate == XML_PARSER_EOF)
3796 return(NULL);
3797 c = CUR_CHAR(l);
3798 }
3799 if (len >= XML_MAX_NAMELEN) {
3800 /*
3801 * Okay someone managed to make a huge token, so he's ready to pay
3802 * for the processing speed.
3803 */
3804 xmlChar *buffer;
3805 int max = len * 2;
3806
3807 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3808 if (buffer == NULL) {
3809 xmlErrMemory(ctxt, NULL);
3810 return(NULL);
3811 }
3812 memcpy(buffer, buf, len);
3813 while (xmlIsNameChar(ctxt, c)) {
3814 if (count++ > XML_PARSER_CHUNK_SIZE) {
3815 count = 0;
3816 GROW;
3817 if (ctxt->instate == XML_PARSER_EOF) {
3818 xmlFree(buffer);
3819 return(NULL);
3820 }
3821 }
3822 if (len + 10 > max) {
3823 xmlChar *tmp;
3824
3825 if ((max > XML_MAX_NAME_LENGTH) &&
3826 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3827 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3828 xmlFree(buffer);
3829 return(NULL);
3830 }
3831 max *= 2;
3832 tmp = (xmlChar *) xmlRealloc(buffer,
3833 max * sizeof(xmlChar));
3834 if (tmp == NULL) {
3835 xmlErrMemory(ctxt, NULL);
3836 xmlFree(buffer);
3837 return(NULL);
3838 }
3839 buffer = tmp;
3840 }
3841 COPY_BUF(l,buffer,len,c);
3842 NEXTL(l);
3843 c = CUR_CHAR(l);
3844 }
3845 buffer[len] = 0;
3846 return(buffer);
3847 }
3848 }
3849 if (len == 0)
3850 return(NULL);
3851 if ((len > XML_MAX_NAME_LENGTH) &&
3852 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3853 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3854 return(NULL);
3855 }
3856 return(xmlStrndup(buf, len));
3857 }
3858
3859 /**
3860 * xmlParseEntityValue:
3861 * @ctxt: an XML parser context
3862 * @orig: if non-NULL store a copy of the original entity value
3863 *
3864 * parse a value for ENTITY declarations
3865 *
3866 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3867 * "'" ([^%&'] | PEReference | Reference)* "'"
3868 *
3869 * Returns the EntityValue parsed with reference substituted or NULL
3870 */
3871
3872 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3873 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3874 xmlChar *buf = NULL;
3875 int len = 0;
3876 int size = XML_PARSER_BUFFER_SIZE;
3877 int c, l;
3878 xmlChar stop;
3879 xmlChar *ret = NULL;
3880 const xmlChar *cur = NULL;
3881 xmlParserInputPtr input;
3882
3883 if (RAW == '"') stop = '"';
3884 else if (RAW == '\'') stop = '\'';
3885 else {
3886 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3887 return(NULL);
3888 }
3889 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3890 if (buf == NULL) {
3891 xmlErrMemory(ctxt, NULL);
3892 return(NULL);
3893 }
3894
3895 /*
3896 * The content of the entity definition is copied in a buffer.
3897 */
3898
3899 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3900 input = ctxt->input;
3901 GROW;
3902 if (ctxt->instate == XML_PARSER_EOF) {
3903 xmlFree(buf);
3904 return(NULL);
3905 }
3906 NEXT;
3907 c = CUR_CHAR(l);
3908 /*
3909 * NOTE: 4.4.5 Included in Literal
3910 * When a parameter entity reference appears in a literal entity
3911 * value, ... a single or double quote character in the replacement
3912 * text is always treated as a normal data character and will not
3913 * terminate the literal.
3914 * In practice it means we stop the loop only when back at parsing
3915 * the initial entity and the quote is found
3916 */
3917 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3918 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3919 if (len + 5 >= size) {
3920 xmlChar *tmp;
3921
3922 size *= 2;
3923 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3924 if (tmp == NULL) {
3925 xmlErrMemory(ctxt, NULL);
3926 xmlFree(buf);
3927 return(NULL);
3928 }
3929 buf = tmp;
3930 }
3931 COPY_BUF(l,buf,len,c);
3932 NEXTL(l);
3933 /*
3934 * Pop-up of finished entities.
3935 */
3936 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3937 xmlPopInput(ctxt);
3938
3939 GROW;
3940 c = CUR_CHAR(l);
3941 if (c == 0) {
3942 GROW;
3943 c = CUR_CHAR(l);
3944 }
3945 }
3946 buf[len] = 0;
3947 if (ctxt->instate == XML_PARSER_EOF) {
3948 xmlFree(buf);
3949 return(NULL);
3950 }
3951
3952 /*
3953 * Raise problem w.r.t. '&' and '%' being used in non-entities
3954 * reference constructs. Note Charref will be handled in
3955 * xmlStringDecodeEntities()
3956 */
3957 cur = buf;
3958 while (*cur != 0) { /* non input consuming */
3959 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3960 xmlChar *name;
3961 xmlChar tmp = *cur;
3962
3963 cur++;
3964 name = xmlParseStringName(ctxt, &cur);
3965 if ((name == NULL) || (*cur != ';')) {
3966 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3967 "EntityValue: '%c' forbidden except for entities references\n",
3968 tmp);
3969 }
3970 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3971 (ctxt->inputNr == 1)) {
3972 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3973 }
3974 if (name != NULL)
3975 xmlFree(name);
3976 if (*cur == 0)
3977 break;
3978 }
3979 cur++;
3980 }
3981
3982 /*
3983 * Then PEReference entities are substituted.
3984 */
3985 if (c != stop) {
3986 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3987 xmlFree(buf);
3988 } else {
3989 NEXT;
3990 /*
3991 * NOTE: 4.4.7 Bypassed
3992 * When a general entity reference appears in the EntityValue in
3993 * an entity declaration, it is bypassed and left as is.
3994 * so XML_SUBSTITUTE_REF is not set here.
3995 */
3996 ++ctxt->depth;
3997 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3998 0, 0, 0);
3999 --ctxt->depth;
4000 if (orig != NULL)
4001 *orig = buf;
4002 else
4003 xmlFree(buf);
4004 }
4005
4006 return(ret);
4007 }
4008
4009 /**
4010 * xmlParseAttValueComplex:
4011 * @ctxt: an XML parser context
4012 * @len: the resulting attribute len
4013 * @normalize: wether to apply the inner normalization
4014 *
4015 * parse a value for an attribute, this is the fallback function
4016 * of xmlParseAttValue() when the attribute parsing requires handling
4017 * of non-ASCII characters, or normalization compaction.
4018 *
4019 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4020 */
4021 static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt,int * attlen,int normalize)4022 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
4023 xmlChar limit = 0;
4024 xmlChar *buf = NULL;
4025 xmlChar *rep = NULL;
4026 size_t len = 0;
4027 size_t buf_size = 0;
4028 int c, l, in_space = 0;
4029 xmlChar *current = NULL;
4030 xmlEntityPtr ent;
4031
4032 if (NXT(0) == '"') {
4033 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4034 limit = '"';
4035 NEXT;
4036 } else if (NXT(0) == '\'') {
4037 limit = '\'';
4038 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4039 NEXT;
4040 } else {
4041 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4042 return(NULL);
4043 }
4044
4045 /*
4046 * allocate a translation buffer.
4047 */
4048 buf_size = XML_PARSER_BUFFER_SIZE;
4049 buf = (xmlChar *) xmlMallocAtomic(buf_size);
4050 if (buf == NULL) goto mem_error;
4051
4052 /*
4053 * OK loop until we reach one of the ending char or a size limit.
4054 */
4055 c = CUR_CHAR(l);
4056 while (((NXT(0) != limit) && /* checked */
4057 (IS_CHAR(c)) && (c != '<')) &&
4058 (ctxt->instate != XML_PARSER_EOF)) {
4059 /*
4060 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
4061 * special option is given
4062 */
4063 if ((len > XML_MAX_TEXT_LENGTH) &&
4064 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4065 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4066 "AttValue length too long\n");
4067 goto mem_error;
4068 }
4069 if (c == 0) break;
4070 if (c == '&') {
4071 in_space = 0;
4072 if (NXT(1) == '#') {
4073 int val = xmlParseCharRef(ctxt);
4074
4075 if (val == '&') {
4076 if (ctxt->replaceEntities) {
4077 if (len + 10 > buf_size) {
4078 growBuffer(buf, 10);
4079 }
4080 buf[len++] = '&';
4081 } else {
4082 /*
4083 * The reparsing will be done in xmlStringGetNodeList()
4084 * called by the attribute() function in SAX.c
4085 */
4086 if (len + 10 > buf_size) {
4087 growBuffer(buf, 10);
4088 }
4089 buf[len++] = '&';
4090 buf[len++] = '#';
4091 buf[len++] = '3';
4092 buf[len++] = '8';
4093 buf[len++] = ';';
4094 }
4095 } else if (val != 0) {
4096 if (len + 10 > buf_size) {
4097 growBuffer(buf, 10);
4098 }
4099 len += xmlCopyChar(0, &buf[len], val);
4100 }
4101 } else {
4102 ent = xmlParseEntityRef(ctxt);
4103 ctxt->nbentities++;
4104 if (ent != NULL)
4105 ctxt->nbentities += ent->owner;
4106 if ((ent != NULL) &&
4107 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4108 if (len + 10 > buf_size) {
4109 growBuffer(buf, 10);
4110 }
4111 if ((ctxt->replaceEntities == 0) &&
4112 (ent->content[0] == '&')) {
4113 buf[len++] = '&';
4114 buf[len++] = '#';
4115 buf[len++] = '3';
4116 buf[len++] = '8';
4117 buf[len++] = ';';
4118 } else {
4119 buf[len++] = ent->content[0];
4120 }
4121 } else if ((ent != NULL) &&
4122 (ctxt->replaceEntities != 0)) {
4123 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4124 ++ctxt->depth;
4125 rep = xmlStringDecodeEntities(ctxt, ent->content,
4126 XML_SUBSTITUTE_REF,
4127 0, 0, 0);
4128 --ctxt->depth;
4129 if (rep != NULL) {
4130 current = rep;
4131 while (*current != 0) { /* non input consuming */
4132 if ((*current == 0xD) || (*current == 0xA) ||
4133 (*current == 0x9)) {
4134 buf[len++] = 0x20;
4135 current++;
4136 } else
4137 buf[len++] = *current++;
4138 if (len + 10 > buf_size) {
4139 growBuffer(buf, 10);
4140 }
4141 }
4142 xmlFree(rep);
4143 rep = NULL;
4144 }
4145 } else {
4146 if (len + 10 > buf_size) {
4147 growBuffer(buf, 10);
4148 }
4149 if (ent->content != NULL)
4150 buf[len++] = ent->content[0];
4151 }
4152 } else if (ent != NULL) {
4153 int i = xmlStrlen(ent->name);
4154 const xmlChar *cur = ent->name;
4155
4156 /*
4157 * This may look absurd but is needed to detect
4158 * entities problems
4159 */
4160 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4161 (ent->content != NULL) && (ent->checked == 0)) {
4162 unsigned long oldnbent = ctxt->nbentities;
4163
4164 ++ctxt->depth;
4165 rep = xmlStringDecodeEntities(ctxt, ent->content,
4166 XML_SUBSTITUTE_REF, 0, 0, 0);
4167 --ctxt->depth;
4168
4169 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
4170 if (rep != NULL) {
4171 if (xmlStrchr(rep, '<'))
4172 ent->checked |= 1;
4173 xmlFree(rep);
4174 rep = NULL;
4175 }
4176 }
4177
4178 /*
4179 * Just output the reference
4180 */
4181 buf[len++] = '&';
4182 while (len + i + 10 > buf_size) {
4183 growBuffer(buf, i + 10);
4184 }
4185 for (;i > 0;i--)
4186 buf[len++] = *cur++;
4187 buf[len++] = ';';
4188 }
4189 }
4190 } else {
4191 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4192 if ((len != 0) || (!normalize)) {
4193 if ((!normalize) || (!in_space)) {
4194 COPY_BUF(l,buf,len,0x20);
4195 while (len + 10 > buf_size) {
4196 growBuffer(buf, 10);
4197 }
4198 }
4199 in_space = 1;
4200 }
4201 } else {
4202 in_space = 0;
4203 COPY_BUF(l,buf,len,c);
4204 if (len + 10 > buf_size) {
4205 growBuffer(buf, 10);
4206 }
4207 }
4208 NEXTL(l);
4209 }
4210 GROW;
4211 c = CUR_CHAR(l);
4212 }
4213 if (ctxt->instate == XML_PARSER_EOF)
4214 goto error;
4215
4216 if ((in_space) && (normalize)) {
4217 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4218 }
4219 buf[len] = 0;
4220 if (RAW == '<') {
4221 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4222 } else if (RAW != limit) {
4223 if ((c != 0) && (!IS_CHAR(c))) {
4224 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4225 "invalid character in attribute value\n");
4226 } else {
4227 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4228 "AttValue: ' expected\n");
4229 }
4230 } else
4231 NEXT;
4232
4233 /*
4234 * There we potentially risk an overflow, don't allow attribute value of
4235 * length more than INT_MAX it is a very reasonnable assumption !
4236 */
4237 if (len >= INT_MAX) {
4238 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4239 "AttValue length too long\n");
4240 goto mem_error;
4241 }
4242
4243 if (attlen != NULL) *attlen = (int) len;
4244 return(buf);
4245
4246 mem_error:
4247 xmlErrMemory(ctxt, NULL);
4248 error:
4249 if (buf != NULL)
4250 xmlFree(buf);
4251 if (rep != NULL)
4252 xmlFree(rep);
4253 return(NULL);
4254 }
4255
4256 /**
4257 * xmlParseAttValue:
4258 * @ctxt: an XML parser context
4259 *
4260 * parse a value for an attribute
4261 * Note: the parser won't do substitution of entities here, this
4262 * will be handled later in xmlStringGetNodeList
4263 *
4264 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4265 * "'" ([^<&'] | Reference)* "'"
4266 *
4267 * 3.3.3 Attribute-Value Normalization:
4268 * Before the value of an attribute is passed to the application or
4269 * checked for validity, the XML processor must normalize it as follows:
4270 * - a character reference is processed by appending the referenced
4271 * character to the attribute value
4272 * - an entity reference is processed by recursively processing the
4273 * replacement text of the entity
4274 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4275 * appending #x20 to the normalized value, except that only a single
4276 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4277 * parsed entity or the literal entity value of an internal parsed entity
4278 * - other characters are processed by appending them to the normalized value
4279 * If the declared value is not CDATA, then the XML processor must further
4280 * process the normalized attribute value by discarding any leading and
4281 * trailing space (#x20) characters, and by replacing sequences of space
4282 * (#x20) characters by a single space (#x20) character.
4283 * All attributes for which no declaration has been read should be treated
4284 * by a non-validating parser as if declared CDATA.
4285 *
4286 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4287 */
4288
4289
4290 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)4291 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4292 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4293 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4294 }
4295
4296 /**
4297 * xmlParseSystemLiteral:
4298 * @ctxt: an XML parser context
4299 *
4300 * parse an XML Literal
4301 *
4302 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4303 *
4304 * Returns the SystemLiteral parsed or NULL
4305 */
4306
4307 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4308 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4309 xmlChar *buf = NULL;
4310 int len = 0;
4311 int size = XML_PARSER_BUFFER_SIZE;
4312 int cur, l;
4313 xmlChar stop;
4314 int state = ctxt->instate;
4315 int count = 0;
4316
4317 SHRINK;
4318 if (RAW == '"') {
4319 NEXT;
4320 stop = '"';
4321 } else if (RAW == '\'') {
4322 NEXT;
4323 stop = '\'';
4324 } else {
4325 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4326 return(NULL);
4327 }
4328
4329 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4330 if (buf == NULL) {
4331 xmlErrMemory(ctxt, NULL);
4332 return(NULL);
4333 }
4334 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4335 cur = CUR_CHAR(l);
4336 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4337 if (len + 5 >= size) {
4338 xmlChar *tmp;
4339
4340 if ((size > XML_MAX_NAME_LENGTH) &&
4341 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4342 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4343 xmlFree(buf);
4344 ctxt->instate = (xmlParserInputState) state;
4345 return(NULL);
4346 }
4347 size *= 2;
4348 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4349 if (tmp == NULL) {
4350 xmlFree(buf);
4351 xmlErrMemory(ctxt, NULL);
4352 ctxt->instate = (xmlParserInputState) state;
4353 return(NULL);
4354 }
4355 buf = tmp;
4356 }
4357 count++;
4358 if (count > 50) {
4359 GROW;
4360 count = 0;
4361 if (ctxt->instate == XML_PARSER_EOF) {
4362 xmlFree(buf);
4363 return(NULL);
4364 }
4365 }
4366 COPY_BUF(l,buf,len,cur);
4367 NEXTL(l);
4368 cur = CUR_CHAR(l);
4369 if (cur == 0) {
4370 GROW;
4371 SHRINK;
4372 cur = CUR_CHAR(l);
4373 }
4374 }
4375 buf[len] = 0;
4376 ctxt->instate = (xmlParserInputState) state;
4377 if (!IS_CHAR(cur)) {
4378 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4379 } else {
4380 NEXT;
4381 }
4382 return(buf);
4383 }
4384
4385 /**
4386 * xmlParsePubidLiteral:
4387 * @ctxt: an XML parser context
4388 *
4389 * parse an XML public literal
4390 *
4391 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4392 *
4393 * Returns the PubidLiteral parsed or NULL.
4394 */
4395
4396 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4397 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4398 xmlChar *buf = NULL;
4399 int len = 0;
4400 int size = XML_PARSER_BUFFER_SIZE;
4401 xmlChar cur;
4402 xmlChar stop;
4403 int count = 0;
4404 xmlParserInputState oldstate = ctxt->instate;
4405
4406 SHRINK;
4407 if (RAW == '"') {
4408 NEXT;
4409 stop = '"';
4410 } else if (RAW == '\'') {
4411 NEXT;
4412 stop = '\'';
4413 } else {
4414 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4415 return(NULL);
4416 }
4417 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4418 if (buf == NULL) {
4419 xmlErrMemory(ctxt, NULL);
4420 return(NULL);
4421 }
4422 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4423 cur = CUR;
4424 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4425 if (len + 1 >= size) {
4426 xmlChar *tmp;
4427
4428 if ((size > XML_MAX_NAME_LENGTH) &&
4429 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4430 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4431 xmlFree(buf);
4432 return(NULL);
4433 }
4434 size *= 2;
4435 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4436 if (tmp == NULL) {
4437 xmlErrMemory(ctxt, NULL);
4438 xmlFree(buf);
4439 return(NULL);
4440 }
4441 buf = tmp;
4442 }
4443 buf[len++] = cur;
4444 count++;
4445 if (count > 50) {
4446 GROW;
4447 count = 0;
4448 if (ctxt->instate == XML_PARSER_EOF) {
4449 xmlFree(buf);
4450 return(NULL);
4451 }
4452 }
4453 NEXT;
4454 cur = CUR;
4455 if (cur == 0) {
4456 GROW;
4457 SHRINK;
4458 cur = CUR;
4459 }
4460 }
4461 buf[len] = 0;
4462 if (cur != stop) {
4463 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4464 } else {
4465 NEXT;
4466 }
4467 ctxt->instate = oldstate;
4468 return(buf);
4469 }
4470
4471 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4472
4473 /*
4474 * used for the test in the inner loop of the char data testing
4475 */
4476 static const unsigned char test_char_data[256] = {
4477 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4478 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4479 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4480 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4481 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4482 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4483 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4484 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4485 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4486 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4487 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4488 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4489 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4490 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4491 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4492 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4493 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4494 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4495 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4496 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4497 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4498 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4499 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4500 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4501 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4502 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4503 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4504 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4505 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4506 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4507 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4508 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4509 };
4510
4511 /**
4512 * xmlParseCharData:
4513 * @ctxt: an XML parser context
4514 * @cdata: int indicating whether we are within a CDATA section
4515 *
4516 * parse a CharData section.
4517 * if we are within a CDATA section ']]>' marks an end of section.
4518 *
4519 * The right angle bracket (>) may be represented using the string ">",
4520 * and must, for compatibility, be escaped using ">" or a character
4521 * reference when it appears in the string "]]>" in content, when that
4522 * string is not marking the end of a CDATA section.
4523 *
4524 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4525 */
4526
4527 void
xmlParseCharData(xmlParserCtxtPtr ctxt,int cdata)4528 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4529 const xmlChar *in;
4530 int nbchar = 0;
4531 int line = ctxt->input->line;
4532 int col = ctxt->input->col;
4533 int ccol;
4534
4535 SHRINK;
4536 GROW;
4537 /*
4538 * Accelerated common case where input don't need to be
4539 * modified before passing it to the handler.
4540 */
4541 if (!cdata) {
4542 in = ctxt->input->cur;
4543 do {
4544 get_more_space:
4545 while (*in == 0x20) { in++; ctxt->input->col++; }
4546 if (*in == 0xA) {
4547 do {
4548 ctxt->input->line++; ctxt->input->col = 1;
4549 in++;
4550 } while (*in == 0xA);
4551 goto get_more_space;
4552 }
4553 if (*in == '<') {
4554 nbchar = in - ctxt->input->cur;
4555 if (nbchar > 0) {
4556 const xmlChar *tmp = ctxt->input->cur;
4557 ctxt->input->cur = in;
4558
4559 if ((ctxt->sax != NULL) &&
4560 (ctxt->sax->ignorableWhitespace !=
4561 ctxt->sax->characters)) {
4562 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4563 if (ctxt->sax->ignorableWhitespace != NULL)
4564 ctxt->sax->ignorableWhitespace(ctxt->userData,
4565 tmp, nbchar);
4566 } else {
4567 if (ctxt->sax->characters != NULL)
4568 ctxt->sax->characters(ctxt->userData,
4569 tmp, nbchar);
4570 if (*ctxt->space == -1)
4571 *ctxt->space = -2;
4572 }
4573 } else if ((ctxt->sax != NULL) &&
4574 (ctxt->sax->characters != NULL)) {
4575 ctxt->sax->characters(ctxt->userData,
4576 tmp, nbchar);
4577 }
4578 }
4579 return;
4580 }
4581
4582 get_more:
4583 ccol = ctxt->input->col;
4584 while (test_char_data[*in]) {
4585 in++;
4586 ccol++;
4587 }
4588 ctxt->input->col = ccol;
4589 if (*in == 0xA) {
4590 do {
4591 ctxt->input->line++; ctxt->input->col = 1;
4592 in++;
4593 } while (*in == 0xA);
4594 goto get_more;
4595 }
4596 if (*in == ']') {
4597 if ((in[1] == ']') && (in[2] == '>')) {
4598 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4599 ctxt->input->cur = in;
4600 return;
4601 }
4602 in++;
4603 ctxt->input->col++;
4604 goto get_more;
4605 }
4606 nbchar = in - ctxt->input->cur;
4607 if (nbchar > 0) {
4608 if ((ctxt->sax != NULL) &&
4609 (ctxt->sax->ignorableWhitespace !=
4610 ctxt->sax->characters) &&
4611 (IS_BLANK_CH(*ctxt->input->cur))) {
4612 const xmlChar *tmp = ctxt->input->cur;
4613 ctxt->input->cur = in;
4614
4615 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4616 if (ctxt->sax->ignorableWhitespace != NULL)
4617 ctxt->sax->ignorableWhitespace(ctxt->userData,
4618 tmp, nbchar);
4619 } else {
4620 if (ctxt->sax->characters != NULL)
4621 ctxt->sax->characters(ctxt->userData,
4622 tmp, nbchar);
4623 if (*ctxt->space == -1)
4624 *ctxt->space = -2;
4625 }
4626 line = ctxt->input->line;
4627 col = ctxt->input->col;
4628 } else if (ctxt->sax != NULL) {
4629 if (ctxt->sax->characters != NULL)
4630 ctxt->sax->characters(ctxt->userData,
4631 ctxt->input->cur, nbchar);
4632 line = ctxt->input->line;
4633 col = ctxt->input->col;
4634 }
4635 /* something really bad happened in the SAX callback */
4636 if (ctxt->instate != XML_PARSER_CONTENT)
4637 return;
4638 }
4639 ctxt->input->cur = in;
4640 if (*in == 0xD) {
4641 in++;
4642 if (*in == 0xA) {
4643 ctxt->input->cur = in;
4644 in++;
4645 ctxt->input->line++; ctxt->input->col = 1;
4646 continue; /* while */
4647 }
4648 in--;
4649 }
4650 if (*in == '<') {
4651 return;
4652 }
4653 if (*in == '&') {
4654 return;
4655 }
4656 SHRINK;
4657 GROW;
4658 if (ctxt->instate == XML_PARSER_EOF)
4659 return;
4660 in = ctxt->input->cur;
4661 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4662 nbchar = 0;
4663 }
4664 ctxt->input->line = line;
4665 ctxt->input->col = col;
4666 xmlParseCharDataComplex(ctxt, cdata);
4667 }
4668
4669 /**
4670 * xmlParseCharDataComplex:
4671 * @ctxt: an XML parser context
4672 * @cdata: int indicating whether we are within a CDATA section
4673 *
4674 * parse a CharData section.this is the fallback function
4675 * of xmlParseCharData() when the parsing requires handling
4676 * of non-ASCII characters.
4677 */
4678 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int cdata)4679 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4680 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4681 int nbchar = 0;
4682 int cur, l;
4683 int count = 0;
4684
4685 SHRINK;
4686 GROW;
4687 cur = CUR_CHAR(l);
4688 while ((cur != '<') && /* checked */
4689 (cur != '&') &&
4690 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4691 if ((cur == ']') && (NXT(1) == ']') &&
4692 (NXT(2) == '>')) {
4693 if (cdata) break;
4694 else {
4695 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4696 }
4697 }
4698 COPY_BUF(l,buf,nbchar,cur);
4699 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4700 buf[nbchar] = 0;
4701
4702 /*
4703 * OK the segment is to be consumed as chars.
4704 */
4705 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4706 if (areBlanks(ctxt, buf, nbchar, 0)) {
4707 if (ctxt->sax->ignorableWhitespace != NULL)
4708 ctxt->sax->ignorableWhitespace(ctxt->userData,
4709 buf, nbchar);
4710 } else {
4711 if (ctxt->sax->characters != NULL)
4712 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4713 if ((ctxt->sax->characters !=
4714 ctxt->sax->ignorableWhitespace) &&
4715 (*ctxt->space == -1))
4716 *ctxt->space = -2;
4717 }
4718 }
4719 nbchar = 0;
4720 /* something really bad happened in the SAX callback */
4721 if (ctxt->instate != XML_PARSER_CONTENT)
4722 return;
4723 }
4724 count++;
4725 if (count > 50) {
4726 GROW;
4727 count = 0;
4728 if (ctxt->instate == XML_PARSER_EOF)
4729 return;
4730 }
4731 NEXTL(l);
4732 cur = CUR_CHAR(l);
4733 }
4734 if (nbchar != 0) {
4735 buf[nbchar] = 0;
4736 /*
4737 * OK the segment is to be consumed as chars.
4738 */
4739 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4740 if (areBlanks(ctxt, buf, nbchar, 0)) {
4741 if (ctxt->sax->ignorableWhitespace != NULL)
4742 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4743 } else {
4744 if (ctxt->sax->characters != NULL)
4745 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4746 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4747 (*ctxt->space == -1))
4748 *ctxt->space = -2;
4749 }
4750 }
4751 }
4752 if ((cur != 0) && (!IS_CHAR(cur))) {
4753 /* Generate the error and skip the offending character */
4754 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4755 "PCDATA invalid Char value %d\n",
4756 cur);
4757 NEXTL(l);
4758 }
4759 }
4760
4761 /**
4762 * xmlParseExternalID:
4763 * @ctxt: an XML parser context
4764 * @publicID: a xmlChar** receiving PubidLiteral
4765 * @strict: indicate whether we should restrict parsing to only
4766 * production [75], see NOTE below
4767 *
4768 * Parse an External ID or a Public ID
4769 *
4770 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4771 * 'PUBLIC' S PubidLiteral S SystemLiteral
4772 *
4773 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4774 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4775 *
4776 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4777 *
4778 * Returns the function returns SystemLiteral and in the second
4779 * case publicID receives PubidLiteral, is strict is off
4780 * it is possible to return NULL and have publicID set.
4781 */
4782
4783 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)4784 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4785 xmlChar *URI = NULL;
4786
4787 SHRINK;
4788
4789 *publicID = NULL;
4790 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4791 SKIP(6);
4792 if (!IS_BLANK_CH(CUR)) {
4793 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4794 "Space required after 'SYSTEM'\n");
4795 }
4796 SKIP_BLANKS;
4797 URI = xmlParseSystemLiteral(ctxt);
4798 if (URI == NULL) {
4799 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4800 }
4801 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4802 SKIP(6);
4803 if (!IS_BLANK_CH(CUR)) {
4804 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4805 "Space required after 'PUBLIC'\n");
4806 }
4807 SKIP_BLANKS;
4808 *publicID = xmlParsePubidLiteral(ctxt);
4809 if (*publicID == NULL) {
4810 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4811 }
4812 if (strict) {
4813 /*
4814 * We don't handle [83] so "S SystemLiteral" is required.
4815 */
4816 if (!IS_BLANK_CH(CUR)) {
4817 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4818 "Space required after the Public Identifier\n");
4819 }
4820 } else {
4821 /*
4822 * We handle [83] so we return immediately, if
4823 * "S SystemLiteral" is not detected. From a purely parsing
4824 * point of view that's a nice mess.
4825 */
4826 const xmlChar *ptr;
4827 GROW;
4828
4829 ptr = CUR_PTR;
4830 if (!IS_BLANK_CH(*ptr)) return(NULL);
4831
4832 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4833 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4834 }
4835 SKIP_BLANKS;
4836 URI = xmlParseSystemLiteral(ctxt);
4837 if (URI == NULL) {
4838 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4839 }
4840 }
4841 return(URI);
4842 }
4843
4844 /**
4845 * xmlParseCommentComplex:
4846 * @ctxt: an XML parser context
4847 * @buf: the already parsed part of the buffer
4848 * @len: number of bytes filles in the buffer
4849 * @size: allocated size of the buffer
4850 *
4851 * Skip an XML (SGML) comment <!-- .... -->
4852 * The spec says that "For compatibility, the string "--" (double-hyphen)
4853 * must not occur within comments. "
4854 * This is the slow routine in case the accelerator for ascii didn't work
4855 *
4856 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4857 */
4858 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,size_t len,size_t size)4859 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4860 size_t len, size_t size) {
4861 int q, ql;
4862 int r, rl;
4863 int cur, l;
4864 size_t count = 0;
4865 int inputid;
4866
4867 inputid = ctxt->input->id;
4868
4869 if (buf == NULL) {
4870 len = 0;
4871 size = XML_PARSER_BUFFER_SIZE;
4872 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4873 if (buf == NULL) {
4874 xmlErrMemory(ctxt, NULL);
4875 return;
4876 }
4877 }
4878 GROW; /* Assure there's enough input data */
4879 q = CUR_CHAR(ql);
4880 if (q == 0)
4881 goto not_terminated;
4882 if (!IS_CHAR(q)) {
4883 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4884 "xmlParseComment: invalid xmlChar value %d\n",
4885 q);
4886 xmlFree (buf);
4887 return;
4888 }
4889 NEXTL(ql);
4890 r = CUR_CHAR(rl);
4891 if (r == 0)
4892 goto not_terminated;
4893 if (!IS_CHAR(r)) {
4894 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4895 "xmlParseComment: invalid xmlChar value %d\n",
4896 q);
4897 xmlFree (buf);
4898 return;
4899 }
4900 NEXTL(rl);
4901 cur = CUR_CHAR(l);
4902 if (cur == 0)
4903 goto not_terminated;
4904 while (IS_CHAR(cur) && /* checked */
4905 ((cur != '>') ||
4906 (r != '-') || (q != '-'))) {
4907 if ((r == '-') && (q == '-')) {
4908 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4909 }
4910 if ((len > XML_MAX_TEXT_LENGTH) &&
4911 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4912 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4913 "Comment too big found", NULL);
4914 xmlFree (buf);
4915 return;
4916 }
4917 if (len + 5 >= size) {
4918 xmlChar *new_buf;
4919 size_t new_size;
4920
4921 new_size = size * 2;
4922 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4923 if (new_buf == NULL) {
4924 xmlFree (buf);
4925 xmlErrMemory(ctxt, NULL);
4926 return;
4927 }
4928 buf = new_buf;
4929 size = new_size;
4930 }
4931 COPY_BUF(ql,buf,len,q);
4932 q = r;
4933 ql = rl;
4934 r = cur;
4935 rl = l;
4936
4937 count++;
4938 if (count > 50) {
4939 GROW;
4940 count = 0;
4941 if (ctxt->instate == XML_PARSER_EOF) {
4942 xmlFree(buf);
4943 return;
4944 }
4945 }
4946 NEXTL(l);
4947 cur = CUR_CHAR(l);
4948 if (cur == 0) {
4949 SHRINK;
4950 GROW;
4951 cur = CUR_CHAR(l);
4952 }
4953 }
4954 buf[len] = 0;
4955 if (cur == 0) {
4956 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4957 "Comment not terminated \n<!--%.50s\n", buf);
4958 } else if (!IS_CHAR(cur)) {
4959 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4960 "xmlParseComment: invalid xmlChar value %d\n",
4961 cur);
4962 } else {
4963 if (inputid != ctxt->input->id) {
4964 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4965 "Comment doesn't start and stop in the same entity\n");
4966 }
4967 NEXT;
4968 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4969 (!ctxt->disableSAX))
4970 ctxt->sax->comment(ctxt->userData, buf);
4971 }
4972 xmlFree(buf);
4973 return;
4974 not_terminated:
4975 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4976 "Comment not terminated\n", NULL);
4977 xmlFree(buf);
4978 return;
4979 }
4980
4981 /**
4982 * xmlParseComment:
4983 * @ctxt: an XML parser context
4984 *
4985 * Skip an XML (SGML) comment <!-- .... -->
4986 * The spec says that "For compatibility, the string "--" (double-hyphen)
4987 * must not occur within comments. "
4988 *
4989 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4990 */
4991 void
xmlParseComment(xmlParserCtxtPtr ctxt)4992 xmlParseComment(xmlParserCtxtPtr ctxt) {
4993 xmlChar *buf = NULL;
4994 size_t size = XML_PARSER_BUFFER_SIZE;
4995 size_t len = 0;
4996 xmlParserInputState state;
4997 const xmlChar *in;
4998 size_t nbchar = 0;
4999 int ccol;
5000 int inputid;
5001
5002 /*
5003 * Check that there is a comment right here.
5004 */
5005 if ((RAW != '<') || (NXT(1) != '!') ||
5006 (NXT(2) != '-') || (NXT(3) != '-')) return;
5007 state = ctxt->instate;
5008 ctxt->instate = XML_PARSER_COMMENT;
5009 inputid = ctxt->input->id;
5010 SKIP(4);
5011 SHRINK;
5012 GROW;
5013
5014 /*
5015 * Accelerated common case where input don't need to be
5016 * modified before passing it to the handler.
5017 */
5018 in = ctxt->input->cur;
5019 do {
5020 if (*in == 0xA) {
5021 do {
5022 ctxt->input->line++; ctxt->input->col = 1;
5023 in++;
5024 } while (*in == 0xA);
5025 }
5026 get_more:
5027 ccol = ctxt->input->col;
5028 while (((*in > '-') && (*in <= 0x7F)) ||
5029 ((*in >= 0x20) && (*in < '-')) ||
5030 (*in == 0x09)) {
5031 in++;
5032 ccol++;
5033 }
5034 ctxt->input->col = ccol;
5035 if (*in == 0xA) {
5036 do {
5037 ctxt->input->line++; ctxt->input->col = 1;
5038 in++;
5039 } while (*in == 0xA);
5040 goto get_more;
5041 }
5042 nbchar = in - ctxt->input->cur;
5043 /*
5044 * save current set of data
5045 */
5046 if (nbchar > 0) {
5047 if ((ctxt->sax != NULL) &&
5048 (ctxt->sax->comment != NULL)) {
5049 if (buf == NULL) {
5050 if ((*in == '-') && (in[1] == '-'))
5051 size = nbchar + 1;
5052 else
5053 size = XML_PARSER_BUFFER_SIZE + nbchar;
5054 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5055 if (buf == NULL) {
5056 xmlErrMemory(ctxt, NULL);
5057 ctxt->instate = state;
5058 return;
5059 }
5060 len = 0;
5061 } else if (len + nbchar + 1 >= size) {
5062 xmlChar *new_buf;
5063 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5064 new_buf = (xmlChar *) xmlRealloc(buf,
5065 size * sizeof(xmlChar));
5066 if (new_buf == NULL) {
5067 xmlFree (buf);
5068 xmlErrMemory(ctxt, NULL);
5069 ctxt->instate = state;
5070 return;
5071 }
5072 buf = new_buf;
5073 }
5074 memcpy(&buf[len], ctxt->input->cur, nbchar);
5075 len += nbchar;
5076 buf[len] = 0;
5077 }
5078 }
5079 if ((len > XML_MAX_TEXT_LENGTH) &&
5080 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5081 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5082 "Comment too big found", NULL);
5083 xmlFree (buf);
5084 return;
5085 }
5086 ctxt->input->cur = in;
5087 if (*in == 0xA) {
5088 in++;
5089 ctxt->input->line++; ctxt->input->col = 1;
5090 }
5091 if (*in == 0xD) {
5092 in++;
5093 if (*in == 0xA) {
5094 ctxt->input->cur = in;
5095 in++;
5096 ctxt->input->line++; ctxt->input->col = 1;
5097 continue; /* while */
5098 }
5099 in--;
5100 }
5101 SHRINK;
5102 GROW;
5103 if (ctxt->instate == XML_PARSER_EOF) {
5104 xmlFree(buf);
5105 return;
5106 }
5107 in = ctxt->input->cur;
5108 if (*in == '-') {
5109 if (in[1] == '-') {
5110 if (in[2] == '>') {
5111 if (ctxt->input->id != inputid) {
5112 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5113 "comment doesn't start and stop in the same entity\n");
5114 }
5115 SKIP(3);
5116 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5117 (!ctxt->disableSAX)) {
5118 if (buf != NULL)
5119 ctxt->sax->comment(ctxt->userData, buf);
5120 else
5121 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5122 }
5123 if (buf != NULL)
5124 xmlFree(buf);
5125 if (ctxt->instate != XML_PARSER_EOF)
5126 ctxt->instate = state;
5127 return;
5128 }
5129 if (buf != NULL) {
5130 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5131 "Double hyphen within comment: "
5132 "<!--%.50s\n",
5133 buf);
5134 } else
5135 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5136 "Double hyphen within comment\n", NULL);
5137 in++;
5138 ctxt->input->col++;
5139 }
5140 in++;
5141 ctxt->input->col++;
5142 goto get_more;
5143 }
5144 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5145 xmlParseCommentComplex(ctxt, buf, len, size);
5146 ctxt->instate = state;
5147 return;
5148 }
5149
5150
5151 /**
5152 * xmlParsePITarget:
5153 * @ctxt: an XML parser context
5154 *
5155 * parse the name of a PI
5156 *
5157 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5158 *
5159 * Returns the PITarget name or NULL
5160 */
5161
5162 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)5163 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5164 const xmlChar *name;
5165
5166 name = xmlParseName(ctxt);
5167 if ((name != NULL) &&
5168 ((name[0] == 'x') || (name[0] == 'X')) &&
5169 ((name[1] == 'm') || (name[1] == 'M')) &&
5170 ((name[2] == 'l') || (name[2] == 'L'))) {
5171 int i;
5172 if ((name[0] == 'x') && (name[1] == 'm') &&
5173 (name[2] == 'l') && (name[3] == 0)) {
5174 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5175 "XML declaration allowed only at the start of the document\n");
5176 return(name);
5177 } else if (name[3] == 0) {
5178 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5179 return(name);
5180 }
5181 for (i = 0;;i++) {
5182 if (xmlW3CPIs[i] == NULL) break;
5183 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5184 return(name);
5185 }
5186 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5187 "xmlParsePITarget: invalid name prefix 'xml'\n",
5188 NULL, NULL);
5189 }
5190 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5191 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5192 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5193 }
5194 return(name);
5195 }
5196
5197 #ifdef LIBXML_CATALOG_ENABLED
5198 /**
5199 * xmlParseCatalogPI:
5200 * @ctxt: an XML parser context
5201 * @catalog: the PI value string
5202 *
5203 * parse an XML Catalog Processing Instruction.
5204 *
5205 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5206 *
5207 * Occurs only if allowed by the user and if happening in the Misc
5208 * part of the document before any doctype informations
5209 * This will add the given catalog to the parsing context in order
5210 * to be used if there is a resolution need further down in the document
5211 */
5212
5213 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)5214 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5215 xmlChar *URL = NULL;
5216 const xmlChar *tmp, *base;
5217 xmlChar marker;
5218
5219 tmp = catalog;
5220 while (IS_BLANK_CH(*tmp)) tmp++;
5221 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5222 goto error;
5223 tmp += 7;
5224 while (IS_BLANK_CH(*tmp)) tmp++;
5225 if (*tmp != '=') {
5226 return;
5227 }
5228 tmp++;
5229 while (IS_BLANK_CH(*tmp)) tmp++;
5230 marker = *tmp;
5231 if ((marker != '\'') && (marker != '"'))
5232 goto error;
5233 tmp++;
5234 base = tmp;
5235 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5236 if (*tmp == 0)
5237 goto error;
5238 URL = xmlStrndup(base, tmp - base);
5239 tmp++;
5240 while (IS_BLANK_CH(*tmp)) tmp++;
5241 if (*tmp != 0)
5242 goto error;
5243
5244 if (URL != NULL) {
5245 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5246 xmlFree(URL);
5247 }
5248 return;
5249
5250 error:
5251 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5252 "Catalog PI syntax error: %s\n",
5253 catalog, NULL);
5254 if (URL != NULL)
5255 xmlFree(URL);
5256 }
5257 #endif
5258
5259 /**
5260 * xmlParsePI:
5261 * @ctxt: an XML parser context
5262 *
5263 * parse an XML Processing Instruction.
5264 *
5265 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5266 *
5267 * The processing is transfered to SAX once parsed.
5268 */
5269
5270 void
xmlParsePI(xmlParserCtxtPtr ctxt)5271 xmlParsePI(xmlParserCtxtPtr ctxt) {
5272 xmlChar *buf = NULL;
5273 size_t len = 0;
5274 size_t size = XML_PARSER_BUFFER_SIZE;
5275 int cur, l;
5276 const xmlChar *target;
5277 xmlParserInputState state;
5278 int count = 0;
5279
5280 if ((RAW == '<') && (NXT(1) == '?')) {
5281 xmlParserInputPtr input = ctxt->input;
5282 state = ctxt->instate;
5283 ctxt->instate = XML_PARSER_PI;
5284 /*
5285 * this is a Processing Instruction.
5286 */
5287 SKIP(2);
5288 SHRINK;
5289
5290 /*
5291 * Parse the target name and check for special support like
5292 * namespace.
5293 */
5294 target = xmlParsePITarget(ctxt);
5295 if (target != NULL) {
5296 if ((RAW == '?') && (NXT(1) == '>')) {
5297 if (input != ctxt->input) {
5298 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5299 "PI declaration doesn't start and stop in the same entity\n");
5300 }
5301 SKIP(2);
5302
5303 /*
5304 * SAX: PI detected.
5305 */
5306 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5307 (ctxt->sax->processingInstruction != NULL))
5308 ctxt->sax->processingInstruction(ctxt->userData,
5309 target, NULL);
5310 if (ctxt->instate != XML_PARSER_EOF)
5311 ctxt->instate = state;
5312 return;
5313 }
5314 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5315 if (buf == NULL) {
5316 xmlErrMemory(ctxt, NULL);
5317 ctxt->instate = state;
5318 return;
5319 }
5320 cur = CUR;
5321 if (!IS_BLANK(cur)) {
5322 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5323 "ParsePI: PI %s space expected\n", target);
5324 }
5325 SKIP_BLANKS;
5326 cur = CUR_CHAR(l);
5327 while (IS_CHAR(cur) && /* checked */
5328 ((cur != '?') || (NXT(1) != '>'))) {
5329 if (len + 5 >= size) {
5330 xmlChar *tmp;
5331 size_t new_size = size * 2;
5332 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5333 if (tmp == NULL) {
5334 xmlErrMemory(ctxt, NULL);
5335 xmlFree(buf);
5336 ctxt->instate = state;
5337 return;
5338 }
5339 buf = tmp;
5340 size = new_size;
5341 }
5342 count++;
5343 if (count > 50) {
5344 GROW;
5345 if (ctxt->instate == XML_PARSER_EOF) {
5346 xmlFree(buf);
5347 return;
5348 }
5349 count = 0;
5350 if ((len > XML_MAX_TEXT_LENGTH) &&
5351 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5352 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5353 "PI %s too big found", target);
5354 xmlFree(buf);
5355 ctxt->instate = state;
5356 return;
5357 }
5358 }
5359 COPY_BUF(l,buf,len,cur);
5360 NEXTL(l);
5361 cur = CUR_CHAR(l);
5362 if (cur == 0) {
5363 SHRINK;
5364 GROW;
5365 cur = CUR_CHAR(l);
5366 }
5367 }
5368 if ((len > XML_MAX_TEXT_LENGTH) &&
5369 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5370 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5371 "PI %s too big found", target);
5372 xmlFree(buf);
5373 ctxt->instate = state;
5374 return;
5375 }
5376 buf[len] = 0;
5377 if (cur != '?') {
5378 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5379 "ParsePI: PI %s never end ...\n", target);
5380 } else {
5381 if (input != ctxt->input) {
5382 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5383 "PI declaration doesn't start and stop in the same entity\n");
5384 }
5385 SKIP(2);
5386
5387 #ifdef LIBXML_CATALOG_ENABLED
5388 if (((state == XML_PARSER_MISC) ||
5389 (state == XML_PARSER_START)) &&
5390 (xmlStrEqual(target, XML_CATALOG_PI))) {
5391 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5392 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5393 (allow == XML_CATA_ALLOW_ALL))
5394 xmlParseCatalogPI(ctxt, buf);
5395 }
5396 #endif
5397
5398
5399 /*
5400 * SAX: PI detected.
5401 */
5402 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5403 (ctxt->sax->processingInstruction != NULL))
5404 ctxt->sax->processingInstruction(ctxt->userData,
5405 target, buf);
5406 }
5407 xmlFree(buf);
5408 } else {
5409 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5410 }
5411 if (ctxt->instate != XML_PARSER_EOF)
5412 ctxt->instate = state;
5413 }
5414 }
5415
5416 /**
5417 * xmlParseNotationDecl:
5418 * @ctxt: an XML parser context
5419 *
5420 * parse a notation declaration
5421 *
5422 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5423 *
5424 * Hence there is actually 3 choices:
5425 * 'PUBLIC' S PubidLiteral
5426 * 'PUBLIC' S PubidLiteral S SystemLiteral
5427 * and 'SYSTEM' S SystemLiteral
5428 *
5429 * See the NOTE on xmlParseExternalID().
5430 */
5431
5432 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5433 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5434 const xmlChar *name;
5435 xmlChar *Pubid;
5436 xmlChar *Systemid;
5437
5438 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5439 xmlParserInputPtr input = ctxt->input;
5440 SHRINK;
5441 SKIP(10);
5442 if (!IS_BLANK_CH(CUR)) {
5443 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5444 "Space required after '<!NOTATION'\n");
5445 return;
5446 }
5447 SKIP_BLANKS;
5448
5449 name = xmlParseName(ctxt);
5450 if (name == NULL) {
5451 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5452 return;
5453 }
5454 if (!IS_BLANK_CH(CUR)) {
5455 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5456 "Space required after the NOTATION name'\n");
5457 return;
5458 }
5459 if (xmlStrchr(name, ':') != NULL) {
5460 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5461 "colons are forbidden from notation names '%s'\n",
5462 name, NULL, NULL);
5463 }
5464 SKIP_BLANKS;
5465
5466 /*
5467 * Parse the IDs.
5468 */
5469 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5470 SKIP_BLANKS;
5471
5472 if (RAW == '>') {
5473 if (input != ctxt->input) {
5474 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5475 "Notation declaration doesn't start and stop in the same entity\n");
5476 }
5477 NEXT;
5478 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5479 (ctxt->sax->notationDecl != NULL))
5480 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5481 } else {
5482 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5483 }
5484 if (Systemid != NULL) xmlFree(Systemid);
5485 if (Pubid != NULL) xmlFree(Pubid);
5486 }
5487 }
5488
5489 /**
5490 * xmlParseEntityDecl:
5491 * @ctxt: an XML parser context
5492 *
5493 * parse <!ENTITY declarations
5494 *
5495 * [70] EntityDecl ::= GEDecl | PEDecl
5496 *
5497 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5498 *
5499 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5500 *
5501 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5502 *
5503 * [74] PEDef ::= EntityValue | ExternalID
5504 *
5505 * [76] NDataDecl ::= S 'NDATA' S Name
5506 *
5507 * [ VC: Notation Declared ]
5508 * The Name must match the declared name of a notation.
5509 */
5510
5511 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5512 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5513 const xmlChar *name = NULL;
5514 xmlChar *value = NULL;
5515 xmlChar *URI = NULL, *literal = NULL;
5516 const xmlChar *ndata = NULL;
5517 int isParameter = 0;
5518 xmlChar *orig = NULL;
5519 int skipped;
5520
5521 /* GROW; done in the caller */
5522 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5523 xmlParserInputPtr input = ctxt->input;
5524 SHRINK;
5525 SKIP(8);
5526 skipped = SKIP_BLANKS;
5527 if (skipped == 0) {
5528 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5529 "Space required after '<!ENTITY'\n");
5530 }
5531
5532 if (RAW == '%') {
5533 NEXT;
5534 skipped = SKIP_BLANKS;
5535 if (skipped == 0) {
5536 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5537 "Space required after '%%'\n");
5538 }
5539 isParameter = 1;
5540 }
5541
5542 name = xmlParseName(ctxt);
5543 if (name == NULL) {
5544 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5545 "xmlParseEntityDecl: no name\n");
5546 return;
5547 }
5548 if (xmlStrchr(name, ':') != NULL) {
5549 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5550 "colons are forbidden from entities names '%s'\n",
5551 name, NULL, NULL);
5552 }
5553 skipped = SKIP_BLANKS;
5554 if (skipped == 0) {
5555 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5556 "Space required after the entity name\n");
5557 }
5558
5559 ctxt->instate = XML_PARSER_ENTITY_DECL;
5560 /*
5561 * handle the various case of definitions...
5562 */
5563 if (isParameter) {
5564 if ((RAW == '"') || (RAW == '\'')) {
5565 value = xmlParseEntityValue(ctxt, &orig);
5566 if (value) {
5567 if ((ctxt->sax != NULL) &&
5568 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5569 ctxt->sax->entityDecl(ctxt->userData, name,
5570 XML_INTERNAL_PARAMETER_ENTITY,
5571 NULL, NULL, value);
5572 }
5573 } else {
5574 URI = xmlParseExternalID(ctxt, &literal, 1);
5575 if ((URI == NULL) && (literal == NULL)) {
5576 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5577 }
5578 if (URI) {
5579 xmlURIPtr uri;
5580
5581 uri = xmlParseURI((const char *) URI);
5582 if (uri == NULL) {
5583 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5584 "Invalid URI: %s\n", URI);
5585 /*
5586 * This really ought to be a well formedness error
5587 * but the XML Core WG decided otherwise c.f. issue
5588 * E26 of the XML erratas.
5589 */
5590 } else {
5591 if (uri->fragment != NULL) {
5592 /*
5593 * Okay this is foolish to block those but not
5594 * invalid URIs.
5595 */
5596 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5597 } else {
5598 if ((ctxt->sax != NULL) &&
5599 (!ctxt->disableSAX) &&
5600 (ctxt->sax->entityDecl != NULL))
5601 ctxt->sax->entityDecl(ctxt->userData, name,
5602 XML_EXTERNAL_PARAMETER_ENTITY,
5603 literal, URI, NULL);
5604 }
5605 xmlFreeURI(uri);
5606 }
5607 }
5608 }
5609 } else {
5610 if ((RAW == '"') || (RAW == '\'')) {
5611 value = xmlParseEntityValue(ctxt, &orig);
5612 if ((ctxt->sax != NULL) &&
5613 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5614 ctxt->sax->entityDecl(ctxt->userData, name,
5615 XML_INTERNAL_GENERAL_ENTITY,
5616 NULL, NULL, value);
5617 /*
5618 * For expat compatibility in SAX mode.
5619 */
5620 if ((ctxt->myDoc == NULL) ||
5621 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5622 if (ctxt->myDoc == NULL) {
5623 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5624 if (ctxt->myDoc == NULL) {
5625 xmlErrMemory(ctxt, "New Doc failed");
5626 return;
5627 }
5628 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5629 }
5630 if (ctxt->myDoc->intSubset == NULL)
5631 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5632 BAD_CAST "fake", NULL, NULL);
5633
5634 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5635 NULL, NULL, value);
5636 }
5637 } else {
5638 URI = xmlParseExternalID(ctxt, &literal, 1);
5639 if ((URI == NULL) && (literal == NULL)) {
5640 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5641 }
5642 if (URI) {
5643 xmlURIPtr uri;
5644
5645 uri = xmlParseURI((const char *)URI);
5646 if (uri == NULL) {
5647 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5648 "Invalid URI: %s\n", URI);
5649 /*
5650 * This really ought to be a well formedness error
5651 * but the XML Core WG decided otherwise c.f. issue
5652 * E26 of the XML erratas.
5653 */
5654 } else {
5655 if (uri->fragment != NULL) {
5656 /*
5657 * Okay this is foolish to block those but not
5658 * invalid URIs.
5659 */
5660 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5661 }
5662 xmlFreeURI(uri);
5663 }
5664 }
5665 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5666 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5667 "Space required before 'NDATA'\n");
5668 }
5669 SKIP_BLANKS;
5670 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5671 SKIP(5);
5672 if (!IS_BLANK_CH(CUR)) {
5673 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5674 "Space required after 'NDATA'\n");
5675 }
5676 SKIP_BLANKS;
5677 ndata = xmlParseName(ctxt);
5678 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5679 (ctxt->sax->unparsedEntityDecl != NULL))
5680 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5681 literal, URI, ndata);
5682 } else {
5683 if ((ctxt->sax != NULL) &&
5684 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5685 ctxt->sax->entityDecl(ctxt->userData, name,
5686 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5687 literal, URI, NULL);
5688 /*
5689 * For expat compatibility in SAX mode.
5690 * assuming the entity repalcement was asked for
5691 */
5692 if ((ctxt->replaceEntities != 0) &&
5693 ((ctxt->myDoc == NULL) ||
5694 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5695 if (ctxt->myDoc == NULL) {
5696 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5697 if (ctxt->myDoc == NULL) {
5698 xmlErrMemory(ctxt, "New Doc failed");
5699 return;
5700 }
5701 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5702 }
5703
5704 if (ctxt->myDoc->intSubset == NULL)
5705 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5706 BAD_CAST "fake", NULL, NULL);
5707 xmlSAX2EntityDecl(ctxt, name,
5708 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5709 literal, URI, NULL);
5710 }
5711 }
5712 }
5713 }
5714 if (ctxt->instate == XML_PARSER_EOF)
5715 return;
5716 SKIP_BLANKS;
5717 if (RAW != '>') {
5718 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5719 "xmlParseEntityDecl: entity %s not terminated\n", name);
5720 xmlHaltParser(ctxt);
5721 } else {
5722 if (input != ctxt->input) {
5723 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5724 "Entity declaration doesn't start and stop in the same entity\n");
5725 }
5726 NEXT;
5727 }
5728 if (orig != NULL) {
5729 /*
5730 * Ugly mechanism to save the raw entity value.
5731 */
5732 xmlEntityPtr cur = NULL;
5733
5734 if (isParameter) {
5735 if ((ctxt->sax != NULL) &&
5736 (ctxt->sax->getParameterEntity != NULL))
5737 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5738 } else {
5739 if ((ctxt->sax != NULL) &&
5740 (ctxt->sax->getEntity != NULL))
5741 cur = ctxt->sax->getEntity(ctxt->userData, name);
5742 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5743 cur = xmlSAX2GetEntity(ctxt, name);
5744 }
5745 }
5746 if (cur != NULL) {
5747 if (cur->orig != NULL)
5748 xmlFree(orig);
5749 else
5750 cur->orig = orig;
5751 } else
5752 xmlFree(orig);
5753 }
5754 if (value != NULL) xmlFree(value);
5755 if (URI != NULL) xmlFree(URI);
5756 if (literal != NULL) xmlFree(literal);
5757 }
5758 }
5759
5760 /**
5761 * xmlParseDefaultDecl:
5762 * @ctxt: an XML parser context
5763 * @value: Receive a possible fixed default value for the attribute
5764 *
5765 * Parse an attribute default declaration
5766 *
5767 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5768 *
5769 * [ VC: Required Attribute ]
5770 * if the default declaration is the keyword #REQUIRED, then the
5771 * attribute must be specified for all elements of the type in the
5772 * attribute-list declaration.
5773 *
5774 * [ VC: Attribute Default Legal ]
5775 * The declared default value must meet the lexical constraints of
5776 * the declared attribute type c.f. xmlValidateAttributeDecl()
5777 *
5778 * [ VC: Fixed Attribute Default ]
5779 * if an attribute has a default value declared with the #FIXED
5780 * keyword, instances of that attribute must match the default value.
5781 *
5782 * [ WFC: No < in Attribute Values ]
5783 * handled in xmlParseAttValue()
5784 *
5785 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5786 * or XML_ATTRIBUTE_FIXED.
5787 */
5788
5789 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5790 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5791 int val;
5792 xmlChar *ret;
5793
5794 *value = NULL;
5795 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5796 SKIP(9);
5797 return(XML_ATTRIBUTE_REQUIRED);
5798 }
5799 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5800 SKIP(8);
5801 return(XML_ATTRIBUTE_IMPLIED);
5802 }
5803 val = XML_ATTRIBUTE_NONE;
5804 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5805 SKIP(6);
5806 val = XML_ATTRIBUTE_FIXED;
5807 if (!IS_BLANK_CH(CUR)) {
5808 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5809 "Space required after '#FIXED'\n");
5810 }
5811 SKIP_BLANKS;
5812 }
5813 ret = xmlParseAttValue(ctxt);
5814 ctxt->instate = XML_PARSER_DTD;
5815 if (ret == NULL) {
5816 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5817 "Attribute default value declaration error\n");
5818 } else
5819 *value = ret;
5820 return(val);
5821 }
5822
5823 /**
5824 * xmlParseNotationType:
5825 * @ctxt: an XML parser context
5826 *
5827 * parse an Notation attribute type.
5828 *
5829 * Note: the leading 'NOTATION' S part has already being parsed...
5830 *
5831 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5832 *
5833 * [ VC: Notation Attributes ]
5834 * Values of this type must match one of the notation names included
5835 * in the declaration; all notation names in the declaration must be declared.
5836 *
5837 * Returns: the notation attribute tree built while parsing
5838 */
5839
5840 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5841 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5842 const xmlChar *name;
5843 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5844
5845 if (RAW != '(') {
5846 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5847 return(NULL);
5848 }
5849 SHRINK;
5850 do {
5851 NEXT;
5852 SKIP_BLANKS;
5853 name = xmlParseName(ctxt);
5854 if (name == NULL) {
5855 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5856 "Name expected in NOTATION declaration\n");
5857 xmlFreeEnumeration(ret);
5858 return(NULL);
5859 }
5860 tmp = ret;
5861 while (tmp != NULL) {
5862 if (xmlStrEqual(name, tmp->name)) {
5863 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5864 "standalone: attribute notation value token %s duplicated\n",
5865 name, NULL);
5866 if (!xmlDictOwns(ctxt->dict, name))
5867 xmlFree((xmlChar *) name);
5868 break;
5869 }
5870 tmp = tmp->next;
5871 }
5872 if (tmp == NULL) {
5873 cur = xmlCreateEnumeration(name);
5874 if (cur == NULL) {
5875 xmlFreeEnumeration(ret);
5876 return(NULL);
5877 }
5878 if (last == NULL) ret = last = cur;
5879 else {
5880 last->next = cur;
5881 last = cur;
5882 }
5883 }
5884 SKIP_BLANKS;
5885 } while (RAW == '|');
5886 if (RAW != ')') {
5887 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5888 xmlFreeEnumeration(ret);
5889 return(NULL);
5890 }
5891 NEXT;
5892 return(ret);
5893 }
5894
5895 /**
5896 * xmlParseEnumerationType:
5897 * @ctxt: an XML parser context
5898 *
5899 * parse an Enumeration attribute type.
5900 *
5901 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5902 *
5903 * [ VC: Enumeration ]
5904 * Values of this type must match one of the Nmtoken tokens in
5905 * the declaration
5906 *
5907 * Returns: the enumeration attribute tree built while parsing
5908 */
5909
5910 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)5911 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5912 xmlChar *name;
5913 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5914
5915 if (RAW != '(') {
5916 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5917 return(NULL);
5918 }
5919 SHRINK;
5920 do {
5921 NEXT;
5922 SKIP_BLANKS;
5923 name = xmlParseNmtoken(ctxt);
5924 if (name == NULL) {
5925 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5926 return(ret);
5927 }
5928 tmp = ret;
5929 while (tmp != NULL) {
5930 if (xmlStrEqual(name, tmp->name)) {
5931 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5932 "standalone: attribute enumeration value token %s duplicated\n",
5933 name, NULL);
5934 if (!xmlDictOwns(ctxt->dict, name))
5935 xmlFree(name);
5936 break;
5937 }
5938 tmp = tmp->next;
5939 }
5940 if (tmp == NULL) {
5941 cur = xmlCreateEnumeration(name);
5942 if (!xmlDictOwns(ctxt->dict, name))
5943 xmlFree(name);
5944 if (cur == NULL) {
5945 xmlFreeEnumeration(ret);
5946 return(NULL);
5947 }
5948 if (last == NULL) ret = last = cur;
5949 else {
5950 last->next = cur;
5951 last = cur;
5952 }
5953 }
5954 SKIP_BLANKS;
5955 } while (RAW == '|');
5956 if (RAW != ')') {
5957 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5958 return(ret);
5959 }
5960 NEXT;
5961 return(ret);
5962 }
5963
5964 /**
5965 * xmlParseEnumeratedType:
5966 * @ctxt: an XML parser context
5967 * @tree: the enumeration tree built while parsing
5968 *
5969 * parse an Enumerated attribute type.
5970 *
5971 * [57] EnumeratedType ::= NotationType | Enumeration
5972 *
5973 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5974 *
5975 *
5976 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5977 */
5978
5979 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5980 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5981 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5982 SKIP(8);
5983 if (!IS_BLANK_CH(CUR)) {
5984 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5985 "Space required after 'NOTATION'\n");
5986 return(0);
5987 }
5988 SKIP_BLANKS;
5989 *tree = xmlParseNotationType(ctxt);
5990 if (*tree == NULL) return(0);
5991 return(XML_ATTRIBUTE_NOTATION);
5992 }
5993 *tree = xmlParseEnumerationType(ctxt);
5994 if (*tree == NULL) return(0);
5995 return(XML_ATTRIBUTE_ENUMERATION);
5996 }
5997
5998 /**
5999 * xmlParseAttributeType:
6000 * @ctxt: an XML parser context
6001 * @tree: the enumeration tree built while parsing
6002 *
6003 * parse the Attribute list def for an element
6004 *
6005 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
6006 *
6007 * [55] StringType ::= 'CDATA'
6008 *
6009 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6010 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6011 *
6012 * Validity constraints for attribute values syntax are checked in
6013 * xmlValidateAttributeValue()
6014 *
6015 * [ VC: ID ]
6016 * Values of type ID must match the Name production. A name must not
6017 * appear more than once in an XML document as a value of this type;
6018 * i.e., ID values must uniquely identify the elements which bear them.
6019 *
6020 * [ VC: One ID per Element Type ]
6021 * No element type may have more than one ID attribute specified.
6022 *
6023 * [ VC: ID Attribute Default ]
6024 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6025 *
6026 * [ VC: IDREF ]
6027 * Values of type IDREF must match the Name production, and values
6028 * of type IDREFS must match Names; each IDREF Name must match the value
6029 * of an ID attribute on some element in the XML document; i.e. IDREF
6030 * values must match the value of some ID attribute.
6031 *
6032 * [ VC: Entity Name ]
6033 * Values of type ENTITY must match the Name production, values
6034 * of type ENTITIES must match Names; each Entity Name must match the
6035 * name of an unparsed entity declared in the DTD.
6036 *
6037 * [ VC: Name Token ]
6038 * Values of type NMTOKEN must match the Nmtoken production; values
6039 * of type NMTOKENS must match Nmtokens.
6040 *
6041 * Returns the attribute type
6042 */
6043 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)6044 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6045 SHRINK;
6046 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6047 SKIP(5);
6048 return(XML_ATTRIBUTE_CDATA);
6049 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6050 SKIP(6);
6051 return(XML_ATTRIBUTE_IDREFS);
6052 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6053 SKIP(5);
6054 return(XML_ATTRIBUTE_IDREF);
6055 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6056 SKIP(2);
6057 return(XML_ATTRIBUTE_ID);
6058 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6059 SKIP(6);
6060 return(XML_ATTRIBUTE_ENTITY);
6061 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6062 SKIP(8);
6063 return(XML_ATTRIBUTE_ENTITIES);
6064 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6065 SKIP(8);
6066 return(XML_ATTRIBUTE_NMTOKENS);
6067 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6068 SKIP(7);
6069 return(XML_ATTRIBUTE_NMTOKEN);
6070 }
6071 return(xmlParseEnumeratedType(ctxt, tree));
6072 }
6073
6074 /**
6075 * xmlParseAttributeListDecl:
6076 * @ctxt: an XML parser context
6077 *
6078 * : parse the Attribute list def for an element
6079 *
6080 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6081 *
6082 * [53] AttDef ::= S Name S AttType S DefaultDecl
6083 *
6084 */
6085 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)6086 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6087 const xmlChar *elemName;
6088 const xmlChar *attrName;
6089 xmlEnumerationPtr tree;
6090
6091 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6092 xmlParserInputPtr input = ctxt->input;
6093
6094 SKIP(9);
6095 if (!IS_BLANK_CH(CUR)) {
6096 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6097 "Space required after '<!ATTLIST'\n");
6098 }
6099 SKIP_BLANKS;
6100 elemName = xmlParseName(ctxt);
6101 if (elemName == NULL) {
6102 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6103 "ATTLIST: no name for Element\n");
6104 return;
6105 }
6106 SKIP_BLANKS;
6107 GROW;
6108 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6109 const xmlChar *check = CUR_PTR;
6110 int type;
6111 int def;
6112 xmlChar *defaultValue = NULL;
6113
6114 GROW;
6115 tree = NULL;
6116 attrName = xmlParseName(ctxt);
6117 if (attrName == NULL) {
6118 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6119 "ATTLIST: no name for Attribute\n");
6120 break;
6121 }
6122 GROW;
6123 if (!IS_BLANK_CH(CUR)) {
6124 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6125 "Space required after the attribute name\n");
6126 break;
6127 }
6128 SKIP_BLANKS;
6129
6130 type = xmlParseAttributeType(ctxt, &tree);
6131 if (type <= 0) {
6132 break;
6133 }
6134
6135 GROW;
6136 if (!IS_BLANK_CH(CUR)) {
6137 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6138 "Space required after the attribute type\n");
6139 if (tree != NULL)
6140 xmlFreeEnumeration(tree);
6141 break;
6142 }
6143 SKIP_BLANKS;
6144
6145 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6146 if (def <= 0) {
6147 if (defaultValue != NULL)
6148 xmlFree(defaultValue);
6149 if (tree != NULL)
6150 xmlFreeEnumeration(tree);
6151 break;
6152 }
6153 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6154 xmlAttrNormalizeSpace(defaultValue, defaultValue);
6155
6156 GROW;
6157 if (RAW != '>') {
6158 if (!IS_BLANK_CH(CUR)) {
6159 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6160 "Space required after the attribute default value\n");
6161 if (defaultValue != NULL)
6162 xmlFree(defaultValue);
6163 if (tree != NULL)
6164 xmlFreeEnumeration(tree);
6165 break;
6166 }
6167 SKIP_BLANKS;
6168 }
6169 if (check == CUR_PTR) {
6170 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6171 "in xmlParseAttributeListDecl\n");
6172 if (defaultValue != NULL)
6173 xmlFree(defaultValue);
6174 if (tree != NULL)
6175 xmlFreeEnumeration(tree);
6176 break;
6177 }
6178 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6179 (ctxt->sax->attributeDecl != NULL))
6180 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6181 type, def, defaultValue, tree);
6182 else if (tree != NULL)
6183 xmlFreeEnumeration(tree);
6184
6185 if ((ctxt->sax2) && (defaultValue != NULL) &&
6186 (def != XML_ATTRIBUTE_IMPLIED) &&
6187 (def != XML_ATTRIBUTE_REQUIRED)) {
6188 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6189 }
6190 if (ctxt->sax2) {
6191 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6192 }
6193 if (defaultValue != NULL)
6194 xmlFree(defaultValue);
6195 GROW;
6196 }
6197 if (RAW == '>') {
6198 if (input != ctxt->input) {
6199 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6200 "Attribute list declaration doesn't start and stop in the same entity\n",
6201 NULL, NULL);
6202 }
6203 NEXT;
6204 }
6205 }
6206 }
6207
6208 /**
6209 * xmlParseElementMixedContentDecl:
6210 * @ctxt: an XML parser context
6211 * @inputchk: the input used for the current entity, needed for boundary checks
6212 *
6213 * parse the declaration for a Mixed Element content
6214 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6215 *
6216 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6217 * '(' S? '#PCDATA' S? ')'
6218 *
6219 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6220 *
6221 * [ VC: No Duplicate Types ]
6222 * The same name must not appear more than once in a single
6223 * mixed-content declaration.
6224 *
6225 * returns: the list of the xmlElementContentPtr describing the element choices
6226 */
6227 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6228 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6229 xmlElementContentPtr ret = NULL, cur = NULL, n;
6230 const xmlChar *elem = NULL;
6231
6232 GROW;
6233 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6234 SKIP(7);
6235 SKIP_BLANKS;
6236 SHRINK;
6237 if (RAW == ')') {
6238 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6239 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6240 "Element content declaration doesn't start and stop in the same entity\n",
6241 NULL, NULL);
6242 }
6243 NEXT;
6244 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6245 if (ret == NULL)
6246 return(NULL);
6247 if (RAW == '*') {
6248 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6249 NEXT;
6250 }
6251 return(ret);
6252 }
6253 if ((RAW == '(') || (RAW == '|')) {
6254 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6255 if (ret == NULL) return(NULL);
6256 }
6257 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6258 NEXT;
6259 if (elem == NULL) {
6260 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6261 if (ret == NULL) return(NULL);
6262 ret->c1 = cur;
6263 if (cur != NULL)
6264 cur->parent = ret;
6265 cur = ret;
6266 } else {
6267 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6268 if (n == NULL) return(NULL);
6269 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6270 if (n->c1 != NULL)
6271 n->c1->parent = n;
6272 cur->c2 = n;
6273 if (n != NULL)
6274 n->parent = cur;
6275 cur = n;
6276 }
6277 SKIP_BLANKS;
6278 elem = xmlParseName(ctxt);
6279 if (elem == NULL) {
6280 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6281 "xmlParseElementMixedContentDecl : Name expected\n");
6282 xmlFreeDocElementContent(ctxt->myDoc, cur);
6283 return(NULL);
6284 }
6285 SKIP_BLANKS;
6286 GROW;
6287 }
6288 if ((RAW == ')') && (NXT(1) == '*')) {
6289 if (elem != NULL) {
6290 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6291 XML_ELEMENT_CONTENT_ELEMENT);
6292 if (cur->c2 != NULL)
6293 cur->c2->parent = cur;
6294 }
6295 if (ret != NULL)
6296 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6297 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6298 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6299 "Element content declaration doesn't start and stop in the same entity\n",
6300 NULL, NULL);
6301 }
6302 SKIP(2);
6303 } else {
6304 xmlFreeDocElementContent(ctxt->myDoc, ret);
6305 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6306 return(NULL);
6307 }
6308
6309 } else {
6310 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6311 }
6312 return(ret);
6313 }
6314
6315 /**
6316 * xmlParseElementChildrenContentDeclPriv:
6317 * @ctxt: an XML parser context
6318 * @inputchk: the input used for the current entity, needed for boundary checks
6319 * @depth: the level of recursion
6320 *
6321 * parse the declaration for a Mixed Element content
6322 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6323 *
6324 *
6325 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6326 *
6327 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6328 *
6329 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6330 *
6331 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6332 *
6333 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6334 * TODO Parameter-entity replacement text must be properly nested
6335 * with parenthesized groups. That is to say, if either of the
6336 * opening or closing parentheses in a choice, seq, or Mixed
6337 * construct is contained in the replacement text for a parameter
6338 * entity, both must be contained in the same replacement text. For
6339 * interoperability, if a parameter-entity reference appears in a
6340 * choice, seq, or Mixed construct, its replacement text should not
6341 * be empty, and neither the first nor last non-blank character of
6342 * the replacement text should be a connector (| or ,).
6343 *
6344 * Returns the tree of xmlElementContentPtr describing the element
6345 * hierarchy.
6346 */
6347 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)6348 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6349 int depth) {
6350 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6351 const xmlChar *elem;
6352 xmlChar type = 0;
6353
6354 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6355 (depth > 2048)) {
6356 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6357 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6358 depth);
6359 return(NULL);
6360 }
6361 SKIP_BLANKS;
6362 GROW;
6363 if (RAW == '(') {
6364 int inputid = ctxt->input->id;
6365
6366 /* Recurse on first child */
6367 NEXT;
6368 SKIP_BLANKS;
6369 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6370 depth + 1);
6371 SKIP_BLANKS;
6372 GROW;
6373 } else {
6374 elem = xmlParseName(ctxt);
6375 if (elem == NULL) {
6376 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6377 return(NULL);
6378 }
6379 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6380 if (cur == NULL) {
6381 xmlErrMemory(ctxt, NULL);
6382 return(NULL);
6383 }
6384 GROW;
6385 if (RAW == '?') {
6386 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6387 NEXT;
6388 } else if (RAW == '*') {
6389 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6390 NEXT;
6391 } else if (RAW == '+') {
6392 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6393 NEXT;
6394 } else {
6395 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6396 }
6397 GROW;
6398 }
6399 SKIP_BLANKS;
6400 SHRINK;
6401 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6402 /*
6403 * Each loop we parse one separator and one element.
6404 */
6405 if (RAW == ',') {
6406 if (type == 0) type = CUR;
6407
6408 /*
6409 * Detect "Name | Name , Name" error
6410 */
6411 else if (type != CUR) {
6412 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6413 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6414 type);
6415 if ((last != NULL) && (last != ret))
6416 xmlFreeDocElementContent(ctxt->myDoc, last);
6417 if (ret != NULL)
6418 xmlFreeDocElementContent(ctxt->myDoc, ret);
6419 return(NULL);
6420 }
6421 NEXT;
6422
6423 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6424 if (op == NULL) {
6425 if ((last != NULL) && (last != ret))
6426 xmlFreeDocElementContent(ctxt->myDoc, last);
6427 xmlFreeDocElementContent(ctxt->myDoc, ret);
6428 return(NULL);
6429 }
6430 if (last == NULL) {
6431 op->c1 = ret;
6432 if (ret != NULL)
6433 ret->parent = op;
6434 ret = cur = op;
6435 } else {
6436 cur->c2 = op;
6437 if (op != NULL)
6438 op->parent = cur;
6439 op->c1 = last;
6440 if (last != NULL)
6441 last->parent = op;
6442 cur =op;
6443 last = NULL;
6444 }
6445 } else if (RAW == '|') {
6446 if (type == 0) type = CUR;
6447
6448 /*
6449 * Detect "Name , Name | Name" error
6450 */
6451 else if (type != CUR) {
6452 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6453 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6454 type);
6455 if ((last != NULL) && (last != ret))
6456 xmlFreeDocElementContent(ctxt->myDoc, last);
6457 if (ret != NULL)
6458 xmlFreeDocElementContent(ctxt->myDoc, ret);
6459 return(NULL);
6460 }
6461 NEXT;
6462
6463 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6464 if (op == NULL) {
6465 if ((last != NULL) && (last != ret))
6466 xmlFreeDocElementContent(ctxt->myDoc, last);
6467 if (ret != NULL)
6468 xmlFreeDocElementContent(ctxt->myDoc, ret);
6469 return(NULL);
6470 }
6471 if (last == NULL) {
6472 op->c1 = ret;
6473 if (ret != NULL)
6474 ret->parent = op;
6475 ret = cur = op;
6476 } else {
6477 cur->c2 = op;
6478 if (op != NULL)
6479 op->parent = cur;
6480 op->c1 = last;
6481 if (last != NULL)
6482 last->parent = op;
6483 cur =op;
6484 last = NULL;
6485 }
6486 } else {
6487 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6488 if ((last != NULL) && (last != ret))
6489 xmlFreeDocElementContent(ctxt->myDoc, last);
6490 if (ret != NULL)
6491 xmlFreeDocElementContent(ctxt->myDoc, ret);
6492 return(NULL);
6493 }
6494 GROW;
6495 SKIP_BLANKS;
6496 GROW;
6497 if (RAW == '(') {
6498 int inputid = ctxt->input->id;
6499 /* Recurse on second child */
6500 NEXT;
6501 SKIP_BLANKS;
6502 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6503 depth + 1);
6504 SKIP_BLANKS;
6505 } else {
6506 elem = xmlParseName(ctxt);
6507 if (elem == NULL) {
6508 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6509 if (ret != NULL)
6510 xmlFreeDocElementContent(ctxt->myDoc, ret);
6511 return(NULL);
6512 }
6513 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6514 if (last == NULL) {
6515 if (ret != NULL)
6516 xmlFreeDocElementContent(ctxt->myDoc, ret);
6517 return(NULL);
6518 }
6519 if (RAW == '?') {
6520 last->ocur = XML_ELEMENT_CONTENT_OPT;
6521 NEXT;
6522 } else if (RAW == '*') {
6523 last->ocur = XML_ELEMENT_CONTENT_MULT;
6524 NEXT;
6525 } else if (RAW == '+') {
6526 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6527 NEXT;
6528 } else {
6529 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6530 }
6531 }
6532 SKIP_BLANKS;
6533 GROW;
6534 }
6535 if ((cur != NULL) && (last != NULL)) {
6536 cur->c2 = last;
6537 if (last != NULL)
6538 last->parent = cur;
6539 }
6540 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6541 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6542 "Element content declaration doesn't start and stop in the same entity\n",
6543 NULL, NULL);
6544 }
6545 NEXT;
6546 if (RAW == '?') {
6547 if (ret != NULL) {
6548 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6549 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6550 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6551 else
6552 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6553 }
6554 NEXT;
6555 } else if (RAW == '*') {
6556 if (ret != NULL) {
6557 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6558 cur = ret;
6559 /*
6560 * Some normalization:
6561 * (a | b* | c?)* == (a | b | c)*
6562 */
6563 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6564 if ((cur->c1 != NULL) &&
6565 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6566 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6567 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6568 if ((cur->c2 != NULL) &&
6569 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6570 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6571 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6572 cur = cur->c2;
6573 }
6574 }
6575 NEXT;
6576 } else if (RAW == '+') {
6577 if (ret != NULL) {
6578 int found = 0;
6579
6580 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6581 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6582 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6583 else
6584 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6585 /*
6586 * Some normalization:
6587 * (a | b*)+ == (a | b)*
6588 * (a | b?)+ == (a | b)*
6589 */
6590 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6591 if ((cur->c1 != NULL) &&
6592 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6593 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6594 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6595 found = 1;
6596 }
6597 if ((cur->c2 != NULL) &&
6598 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6599 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6600 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6601 found = 1;
6602 }
6603 cur = cur->c2;
6604 }
6605 if (found)
6606 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6607 }
6608 NEXT;
6609 }
6610 return(ret);
6611 }
6612
6613 /**
6614 * xmlParseElementChildrenContentDecl:
6615 * @ctxt: an XML parser context
6616 * @inputchk: the input used for the current entity, needed for boundary checks
6617 *
6618 * parse the declaration for a Mixed Element content
6619 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6620 *
6621 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6622 *
6623 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6624 *
6625 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6626 *
6627 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6628 *
6629 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6630 * TODO Parameter-entity replacement text must be properly nested
6631 * with parenthesized groups. That is to say, if either of the
6632 * opening or closing parentheses in a choice, seq, or Mixed
6633 * construct is contained in the replacement text for a parameter
6634 * entity, both must be contained in the same replacement text. For
6635 * interoperability, if a parameter-entity reference appears in a
6636 * choice, seq, or Mixed construct, its replacement text should not
6637 * be empty, and neither the first nor last non-blank character of
6638 * the replacement text should be a connector (| or ,).
6639 *
6640 * Returns the tree of xmlElementContentPtr describing the element
6641 * hierarchy.
6642 */
6643 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6644 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6645 /* stub left for API/ABI compat */
6646 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6647 }
6648
6649 /**
6650 * xmlParseElementContentDecl:
6651 * @ctxt: an XML parser context
6652 * @name: the name of the element being defined.
6653 * @result: the Element Content pointer will be stored here if any
6654 *
6655 * parse the declaration for an Element content either Mixed or Children,
6656 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6657 *
6658 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6659 *
6660 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6661 */
6662
6663 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6664 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6665 xmlElementContentPtr *result) {
6666
6667 xmlElementContentPtr tree = NULL;
6668 int inputid = ctxt->input->id;
6669 int res;
6670
6671 *result = NULL;
6672
6673 if (RAW != '(') {
6674 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6675 "xmlParseElementContentDecl : %s '(' expected\n", name);
6676 return(-1);
6677 }
6678 NEXT;
6679 GROW;
6680 if (ctxt->instate == XML_PARSER_EOF)
6681 return(-1);
6682 SKIP_BLANKS;
6683 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6684 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6685 res = XML_ELEMENT_TYPE_MIXED;
6686 } else {
6687 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6688 res = XML_ELEMENT_TYPE_ELEMENT;
6689 }
6690 SKIP_BLANKS;
6691 *result = tree;
6692 return(res);
6693 }
6694
6695 /**
6696 * xmlParseElementDecl:
6697 * @ctxt: an XML parser context
6698 *
6699 * parse an Element declaration.
6700 *
6701 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6702 *
6703 * [ VC: Unique Element Type Declaration ]
6704 * No element type may be declared more than once
6705 *
6706 * Returns the type of the element, or -1 in case of error
6707 */
6708 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6709 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6710 const xmlChar *name;
6711 int ret = -1;
6712 xmlElementContentPtr content = NULL;
6713
6714 /* GROW; done in the caller */
6715 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6716 xmlParserInputPtr input = ctxt->input;
6717
6718 SKIP(9);
6719 if (!IS_BLANK_CH(CUR)) {
6720 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6721 "Space required after 'ELEMENT'\n");
6722 return(-1);
6723 }
6724 SKIP_BLANKS;
6725 name = xmlParseName(ctxt);
6726 if (name == NULL) {
6727 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6728 "xmlParseElementDecl: no name for Element\n");
6729 return(-1);
6730 }
6731 while ((RAW == 0) && (ctxt->inputNr > 1))
6732 xmlPopInput(ctxt);
6733 if (!IS_BLANK_CH(CUR)) {
6734 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6735 "Space required after the element name\n");
6736 }
6737 SKIP_BLANKS;
6738 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6739 SKIP(5);
6740 /*
6741 * Element must always be empty.
6742 */
6743 ret = XML_ELEMENT_TYPE_EMPTY;
6744 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6745 (NXT(2) == 'Y')) {
6746 SKIP(3);
6747 /*
6748 * Element is a generic container.
6749 */
6750 ret = XML_ELEMENT_TYPE_ANY;
6751 } else if (RAW == '(') {
6752 ret = xmlParseElementContentDecl(ctxt, name, &content);
6753 } else {
6754 /*
6755 * [ WFC: PEs in Internal Subset ] error handling.
6756 */
6757 if ((RAW == '%') && (ctxt->external == 0) &&
6758 (ctxt->inputNr == 1)) {
6759 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6760 "PEReference: forbidden within markup decl in internal subset\n");
6761 } else {
6762 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6763 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6764 }
6765 return(-1);
6766 }
6767
6768 SKIP_BLANKS;
6769 /*
6770 * Pop-up of finished entities.
6771 */
6772 while ((RAW == 0) && (ctxt->inputNr > 1))
6773 xmlPopInput(ctxt);
6774 SKIP_BLANKS;
6775
6776 if (RAW != '>') {
6777 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6778 if (content != NULL) {
6779 xmlFreeDocElementContent(ctxt->myDoc, content);
6780 }
6781 } else {
6782 if (input != ctxt->input) {
6783 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6784 "Element declaration doesn't start and stop in the same entity\n");
6785 }
6786
6787 NEXT;
6788 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6789 (ctxt->sax->elementDecl != NULL)) {
6790 if (content != NULL)
6791 content->parent = NULL;
6792 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6793 content);
6794 if ((content != NULL) && (content->parent == NULL)) {
6795 /*
6796 * this is a trick: if xmlAddElementDecl is called,
6797 * instead of copying the full tree it is plugged directly
6798 * if called from the parser. Avoid duplicating the
6799 * interfaces or change the API/ABI
6800 */
6801 xmlFreeDocElementContent(ctxt->myDoc, content);
6802 }
6803 } else if (content != NULL) {
6804 xmlFreeDocElementContent(ctxt->myDoc, content);
6805 }
6806 }
6807 }
6808 return(ret);
6809 }
6810
6811 /**
6812 * xmlParseConditionalSections
6813 * @ctxt: an XML parser context
6814 *
6815 * [61] conditionalSect ::= includeSect | ignoreSect
6816 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6817 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6818 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6819 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6820 */
6821
6822 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6823 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6824 int id = ctxt->input->id;
6825
6826 SKIP(3);
6827 SKIP_BLANKS;
6828 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6829 SKIP(7);
6830 SKIP_BLANKS;
6831 if (RAW != '[') {
6832 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6833 xmlHaltParser(ctxt);
6834 return;
6835 } else {
6836 if (ctxt->input->id != id) {
6837 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6838 "All markup of the conditional section is not in the same entity\n",
6839 NULL, NULL);
6840 }
6841 NEXT;
6842 }
6843 if (xmlParserDebugEntities) {
6844 if ((ctxt->input != NULL) && (ctxt->input->filename))
6845 xmlGenericError(xmlGenericErrorContext,
6846 "%s(%d): ", ctxt->input->filename,
6847 ctxt->input->line);
6848 xmlGenericError(xmlGenericErrorContext,
6849 "Entering INCLUDE Conditional Section\n");
6850 }
6851
6852 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6853 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6854 const xmlChar *check = CUR_PTR;
6855 unsigned int cons = ctxt->input->consumed;
6856
6857 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6858 xmlParseConditionalSections(ctxt);
6859 } else if (IS_BLANK_CH(CUR)) {
6860 NEXT;
6861 } else if (RAW == '%') {
6862 xmlParsePEReference(ctxt);
6863 } else
6864 xmlParseMarkupDecl(ctxt);
6865
6866 /*
6867 * Pop-up of finished entities.
6868 */
6869 while ((RAW == 0) && (ctxt->inputNr > 1))
6870 xmlPopInput(ctxt);
6871
6872 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6873 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6874 xmlHaltParser(ctxt);
6875 break;
6876 }
6877 }
6878 if (xmlParserDebugEntities) {
6879 if ((ctxt->input != NULL) && (ctxt->input->filename))
6880 xmlGenericError(xmlGenericErrorContext,
6881 "%s(%d): ", ctxt->input->filename,
6882 ctxt->input->line);
6883 xmlGenericError(xmlGenericErrorContext,
6884 "Leaving INCLUDE Conditional Section\n");
6885 }
6886
6887 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6888 int state;
6889 xmlParserInputState instate;
6890 int depth = 0;
6891
6892 SKIP(6);
6893 SKIP_BLANKS;
6894 if (RAW != '[') {
6895 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6896 xmlHaltParser(ctxt);
6897 return;
6898 } else {
6899 if (ctxt->input->id != id) {
6900 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6901 "All markup of the conditional section is not in the same entity\n",
6902 NULL, NULL);
6903 }
6904 NEXT;
6905 }
6906 if (xmlParserDebugEntities) {
6907 if ((ctxt->input != NULL) && (ctxt->input->filename))
6908 xmlGenericError(xmlGenericErrorContext,
6909 "%s(%d): ", ctxt->input->filename,
6910 ctxt->input->line);
6911 xmlGenericError(xmlGenericErrorContext,
6912 "Entering IGNORE Conditional Section\n");
6913 }
6914
6915 /*
6916 * Parse up to the end of the conditional section
6917 * But disable SAX event generating DTD building in the meantime
6918 */
6919 state = ctxt->disableSAX;
6920 instate = ctxt->instate;
6921 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6922 ctxt->instate = XML_PARSER_IGNORE;
6923
6924 while (((depth >= 0) && (RAW != 0)) &&
6925 (ctxt->instate != XML_PARSER_EOF)) {
6926 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6927 depth++;
6928 SKIP(3);
6929 continue;
6930 }
6931 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6932 if (--depth >= 0) SKIP(3);
6933 continue;
6934 }
6935 NEXT;
6936 continue;
6937 }
6938
6939 ctxt->disableSAX = state;
6940 ctxt->instate = instate;
6941
6942 if (xmlParserDebugEntities) {
6943 if ((ctxt->input != NULL) && (ctxt->input->filename))
6944 xmlGenericError(xmlGenericErrorContext,
6945 "%s(%d): ", ctxt->input->filename,
6946 ctxt->input->line);
6947 xmlGenericError(xmlGenericErrorContext,
6948 "Leaving IGNORE Conditional Section\n");
6949 }
6950
6951 } else {
6952 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6953 xmlHaltParser(ctxt);
6954 return;
6955 }
6956
6957 if (RAW == 0)
6958 SHRINK;
6959
6960 if (RAW == 0) {
6961 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6962 } else {
6963 if (ctxt->input->id != id) {
6964 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6965 "All markup of the conditional section is not in the same entity\n",
6966 NULL, NULL);
6967 }
6968 if ((ctxt-> instate != XML_PARSER_EOF) &&
6969 ((ctxt->input->cur + 3) <= ctxt->input->end))
6970 SKIP(3);
6971 }
6972 }
6973
6974 /**
6975 * xmlParseMarkupDecl:
6976 * @ctxt: an XML parser context
6977 *
6978 * parse Markup declarations
6979 *
6980 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6981 * NotationDecl | PI | Comment
6982 *
6983 * [ VC: Proper Declaration/PE Nesting ]
6984 * Parameter-entity replacement text must be properly nested with
6985 * markup declarations. That is to say, if either the first character
6986 * or the last character of a markup declaration (markupdecl above) is
6987 * contained in the replacement text for a parameter-entity reference,
6988 * both must be contained in the same replacement text.
6989 *
6990 * [ WFC: PEs in Internal Subset ]
6991 * In the internal DTD subset, parameter-entity references can occur
6992 * only where markup declarations can occur, not within markup declarations.
6993 * (This does not apply to references that occur in external parameter
6994 * entities or to the external subset.)
6995 */
6996 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)6997 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6998 GROW;
6999 if (CUR == '<') {
7000 if (NXT(1) == '!') {
7001 switch (NXT(2)) {
7002 case 'E':
7003 if (NXT(3) == 'L')
7004 xmlParseElementDecl(ctxt);
7005 else if (NXT(3) == 'N')
7006 xmlParseEntityDecl(ctxt);
7007 break;
7008 case 'A':
7009 xmlParseAttributeListDecl(ctxt);
7010 break;
7011 case 'N':
7012 xmlParseNotationDecl(ctxt);
7013 break;
7014 case '-':
7015 xmlParseComment(ctxt);
7016 break;
7017 default:
7018 /* there is an error but it will be detected later */
7019 break;
7020 }
7021 } else if (NXT(1) == '?') {
7022 xmlParsePI(ctxt);
7023 }
7024 }
7025
7026 /*
7027 * detect requirement to exit there and act accordingly
7028 * and avoid having instate overriden later on
7029 */
7030 if (ctxt->instate == XML_PARSER_EOF)
7031 return;
7032
7033 /*
7034 * This is only for internal subset. On external entities,
7035 * the replacement is done before parsing stage
7036 */
7037 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
7038 xmlParsePEReference(ctxt);
7039
7040 /*
7041 * Conditional sections are allowed from entities included
7042 * by PE References in the internal subset.
7043 */
7044 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
7045 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7046 xmlParseConditionalSections(ctxt);
7047 }
7048 }
7049
7050 ctxt->instate = XML_PARSER_DTD;
7051 }
7052
7053 /**
7054 * xmlParseTextDecl:
7055 * @ctxt: an XML parser context
7056 *
7057 * parse an XML declaration header for external entities
7058 *
7059 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7060 */
7061
7062 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)7063 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7064 xmlChar *version;
7065 const xmlChar *encoding;
7066
7067 /*
7068 * We know that '<?xml' is here.
7069 */
7070 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7071 SKIP(5);
7072 } else {
7073 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7074 return;
7075 }
7076
7077 if (!IS_BLANK_CH(CUR)) {
7078 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7079 "Space needed after '<?xml'\n");
7080 }
7081 SKIP_BLANKS;
7082
7083 /*
7084 * We may have the VersionInfo here.
7085 */
7086 version = xmlParseVersionInfo(ctxt);
7087 if (version == NULL)
7088 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7089 else {
7090 if (!IS_BLANK_CH(CUR)) {
7091 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7092 "Space needed here\n");
7093 }
7094 }
7095 ctxt->input->version = version;
7096
7097 /*
7098 * We must have the encoding declaration
7099 */
7100 encoding = xmlParseEncodingDecl(ctxt);
7101 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7102 /*
7103 * The XML REC instructs us to stop parsing right here
7104 */
7105 return;
7106 }
7107 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7108 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7109 "Missing encoding in text declaration\n");
7110 }
7111
7112 SKIP_BLANKS;
7113 if ((RAW == '?') && (NXT(1) == '>')) {
7114 SKIP(2);
7115 } else if (RAW == '>') {
7116 /* Deprecated old WD ... */
7117 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7118 NEXT;
7119 } else {
7120 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7121 MOVETO_ENDTAG(CUR_PTR);
7122 NEXT;
7123 }
7124 }
7125
7126 /**
7127 * xmlParseExternalSubset:
7128 * @ctxt: an XML parser context
7129 * @ExternalID: the external identifier
7130 * @SystemID: the system identifier (or URL)
7131 *
7132 * parse Markup declarations from an external subset
7133 *
7134 * [30] extSubset ::= textDecl? extSubsetDecl
7135 *
7136 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7137 */
7138 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)7139 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7140 const xmlChar *SystemID) {
7141 xmlDetectSAX2(ctxt);
7142 GROW;
7143
7144 if ((ctxt->encoding == NULL) &&
7145 (ctxt->input->end - ctxt->input->cur >= 4)) {
7146 xmlChar start[4];
7147 xmlCharEncoding enc;
7148
7149 start[0] = RAW;
7150 start[1] = NXT(1);
7151 start[2] = NXT(2);
7152 start[3] = NXT(3);
7153 enc = xmlDetectCharEncoding(start, 4);
7154 if (enc != XML_CHAR_ENCODING_NONE)
7155 xmlSwitchEncoding(ctxt, enc);
7156 }
7157
7158 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7159 xmlParseTextDecl(ctxt);
7160 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7161 /*
7162 * The XML REC instructs us to stop parsing right here
7163 */
7164 xmlHaltParser(ctxt);
7165 return;
7166 }
7167 }
7168 if (ctxt->myDoc == NULL) {
7169 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7170 if (ctxt->myDoc == NULL) {
7171 xmlErrMemory(ctxt, "New Doc failed");
7172 return;
7173 }
7174 ctxt->myDoc->properties = XML_DOC_INTERNAL;
7175 }
7176 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7177 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7178
7179 ctxt->instate = XML_PARSER_DTD;
7180 ctxt->external = 1;
7181 while (((RAW == '<') && (NXT(1) == '?')) ||
7182 ((RAW == '<') && (NXT(1) == '!')) ||
7183 (RAW == '%') || IS_BLANK_CH(CUR)) {
7184 const xmlChar *check = CUR_PTR;
7185 unsigned int cons = ctxt->input->consumed;
7186
7187 GROW;
7188 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7189 xmlParseConditionalSections(ctxt);
7190 } else if (IS_BLANK_CH(CUR)) {
7191 NEXT;
7192 } else if (RAW == '%') {
7193 xmlParsePEReference(ctxt);
7194 } else
7195 xmlParseMarkupDecl(ctxt);
7196
7197 /*
7198 * Pop-up of finished entities.
7199 */
7200 while ((RAW == 0) && (ctxt->inputNr > 1))
7201 xmlPopInput(ctxt);
7202
7203 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7204 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7205 break;
7206 }
7207 }
7208
7209 if (RAW != 0) {
7210 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7211 }
7212
7213 }
7214
7215 /**
7216 * xmlParseReference:
7217 * @ctxt: an XML parser context
7218 *
7219 * parse and handle entity references in content, depending on the SAX
7220 * interface, this may end-up in a call to character() if this is a
7221 * CharRef, a predefined entity, if there is no reference() callback.
7222 * or if the parser was asked to switch to that mode.
7223 *
7224 * [67] Reference ::= EntityRef | CharRef
7225 */
7226 void
xmlParseReference(xmlParserCtxtPtr ctxt)7227 xmlParseReference(xmlParserCtxtPtr ctxt) {
7228 xmlEntityPtr ent;
7229 xmlChar *val;
7230 int was_checked;
7231 xmlNodePtr list = NULL;
7232 xmlParserErrors ret = XML_ERR_OK;
7233
7234
7235 if (RAW != '&')
7236 return;
7237
7238 /*
7239 * Simple case of a CharRef
7240 */
7241 if (NXT(1) == '#') {
7242 int i = 0;
7243 xmlChar out[10];
7244 int hex = NXT(2);
7245 int value = xmlParseCharRef(ctxt);
7246
7247 if (value == 0)
7248 return;
7249 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7250 /*
7251 * So we are using non-UTF-8 buffers
7252 * Check that the char fit on 8bits, if not
7253 * generate a CharRef.
7254 */
7255 if (value <= 0xFF) {
7256 out[0] = value;
7257 out[1] = 0;
7258 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7259 (!ctxt->disableSAX))
7260 ctxt->sax->characters(ctxt->userData, out, 1);
7261 } else {
7262 if ((hex == 'x') || (hex == 'X'))
7263 snprintf((char *)out, sizeof(out), "#x%X", value);
7264 else
7265 snprintf((char *)out, sizeof(out), "#%d", value);
7266 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7267 (!ctxt->disableSAX))
7268 ctxt->sax->reference(ctxt->userData, out);
7269 }
7270 } else {
7271 /*
7272 * Just encode the value in UTF-8
7273 */
7274 COPY_BUF(0 ,out, i, value);
7275 out[i] = 0;
7276 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7277 (!ctxt->disableSAX))
7278 ctxt->sax->characters(ctxt->userData, out, i);
7279 }
7280 return;
7281 }
7282
7283 /*
7284 * We are seeing an entity reference
7285 */
7286 ent = xmlParseEntityRef(ctxt);
7287 if (ent == NULL) return;
7288 if (!ctxt->wellFormed)
7289 return;
7290 was_checked = ent->checked;
7291
7292 /* special case of predefined entities */
7293 if ((ent->name == NULL) ||
7294 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7295 val = ent->content;
7296 if (val == NULL) return;
7297 /*
7298 * inline the entity.
7299 */
7300 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7301 (!ctxt->disableSAX))
7302 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7303 return;
7304 }
7305
7306 /*
7307 * The first reference to the entity trigger a parsing phase
7308 * where the ent->children is filled with the result from
7309 * the parsing.
7310 * Note: external parsed entities will not be loaded, it is not
7311 * required for a non-validating parser, unless the parsing option
7312 * of validating, or substituting entities were given. Doing so is
7313 * far more secure as the parser will only process data coming from
7314 * the document entity by default.
7315 */
7316 if (((ent->checked == 0) ||
7317 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7318 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7319 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7320 unsigned long oldnbent = ctxt->nbentities;
7321
7322 /*
7323 * This is a bit hackish but this seems the best
7324 * way to make sure both SAX and DOM entity support
7325 * behaves okay.
7326 */
7327 void *user_data;
7328 if (ctxt->userData == ctxt)
7329 user_data = NULL;
7330 else
7331 user_data = ctxt->userData;
7332
7333 /*
7334 * Check that this entity is well formed
7335 * 4.3.2: An internal general parsed entity is well-formed
7336 * if its replacement text matches the production labeled
7337 * content.
7338 */
7339 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7340 ctxt->depth++;
7341 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7342 user_data, &list);
7343 ctxt->depth--;
7344
7345 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7346 ctxt->depth++;
7347 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7348 user_data, ctxt->depth, ent->URI,
7349 ent->ExternalID, &list);
7350 ctxt->depth--;
7351 } else {
7352 ret = XML_ERR_ENTITY_PE_INTERNAL;
7353 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7354 "invalid entity type found\n", NULL);
7355 }
7356
7357 /*
7358 * Store the number of entities needing parsing for this entity
7359 * content and do checkings
7360 */
7361 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7362 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7363 ent->checked |= 1;
7364 if (ret == XML_ERR_ENTITY_LOOP) {
7365 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7366 xmlFreeNodeList(list);
7367 return;
7368 }
7369 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7370 xmlFreeNodeList(list);
7371 return;
7372 }
7373
7374 if ((ret == XML_ERR_OK) && (list != NULL)) {
7375 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7376 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7377 (ent->children == NULL)) {
7378 ent->children = list;
7379 if (ctxt->replaceEntities) {
7380 /*
7381 * Prune it directly in the generated document
7382 * except for single text nodes.
7383 */
7384 if (((list->type == XML_TEXT_NODE) &&
7385 (list->next == NULL)) ||
7386 (ctxt->parseMode == XML_PARSE_READER)) {
7387 list->parent = (xmlNodePtr) ent;
7388 list = NULL;
7389 ent->owner = 1;
7390 } else {
7391 ent->owner = 0;
7392 while (list != NULL) {
7393 list->parent = (xmlNodePtr) ctxt->node;
7394 list->doc = ctxt->myDoc;
7395 if (list->next == NULL)
7396 ent->last = list;
7397 list = list->next;
7398 }
7399 list = ent->children;
7400 #ifdef LIBXML_LEGACY_ENABLED
7401 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7402 xmlAddEntityReference(ent, list, NULL);
7403 #endif /* LIBXML_LEGACY_ENABLED */
7404 }
7405 } else {
7406 ent->owner = 1;
7407 while (list != NULL) {
7408 list->parent = (xmlNodePtr) ent;
7409 xmlSetTreeDoc(list, ent->doc);
7410 if (list->next == NULL)
7411 ent->last = list;
7412 list = list->next;
7413 }
7414 }
7415 } else {
7416 xmlFreeNodeList(list);
7417 list = NULL;
7418 }
7419 } else if ((ret != XML_ERR_OK) &&
7420 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7421 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7422 "Entity '%s' failed to parse\n", ent->name);
7423 xmlParserEntityCheck(ctxt, 0, ent, 0);
7424 } else if (list != NULL) {
7425 xmlFreeNodeList(list);
7426 list = NULL;
7427 }
7428 if (ent->checked == 0)
7429 ent->checked = 2;
7430 } else if (ent->checked != 1) {
7431 ctxt->nbentities += ent->checked / 2;
7432 }
7433
7434 /*
7435 * Now that the entity content has been gathered
7436 * provide it to the application, this can take different forms based
7437 * on the parsing modes.
7438 */
7439 if (ent->children == NULL) {
7440 /*
7441 * Probably running in SAX mode and the callbacks don't
7442 * build the entity content. So unless we already went
7443 * though parsing for first checking go though the entity
7444 * content to generate callbacks associated to the entity
7445 */
7446 if (was_checked != 0) {
7447 void *user_data;
7448 /*
7449 * This is a bit hackish but this seems the best
7450 * way to make sure both SAX and DOM entity support
7451 * behaves okay.
7452 */
7453 if (ctxt->userData == ctxt)
7454 user_data = NULL;
7455 else
7456 user_data = ctxt->userData;
7457
7458 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7459 ctxt->depth++;
7460 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7461 ent->content, user_data, NULL);
7462 ctxt->depth--;
7463 } else if (ent->etype ==
7464 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7465 ctxt->depth++;
7466 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7467 ctxt->sax, user_data, ctxt->depth,
7468 ent->URI, ent->ExternalID, NULL);
7469 ctxt->depth--;
7470 } else {
7471 ret = XML_ERR_ENTITY_PE_INTERNAL;
7472 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7473 "invalid entity type found\n", NULL);
7474 }
7475 if (ret == XML_ERR_ENTITY_LOOP) {
7476 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7477 return;
7478 }
7479 }
7480 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7481 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7482 /*
7483 * Entity reference callback comes second, it's somewhat
7484 * superfluous but a compatibility to historical behaviour
7485 */
7486 ctxt->sax->reference(ctxt->userData, ent->name);
7487 }
7488 return;
7489 }
7490
7491 /*
7492 * If we didn't get any children for the entity being built
7493 */
7494 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7495 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7496 /*
7497 * Create a node.
7498 */
7499 ctxt->sax->reference(ctxt->userData, ent->name);
7500 return;
7501 }
7502
7503 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7504 /*
7505 * There is a problem on the handling of _private for entities
7506 * (bug 155816): Should we copy the content of the field from
7507 * the entity (possibly overwriting some value set by the user
7508 * when a copy is created), should we leave it alone, or should
7509 * we try to take care of different situations? The problem
7510 * is exacerbated by the usage of this field by the xmlReader.
7511 * To fix this bug, we look at _private on the created node
7512 * and, if it's NULL, we copy in whatever was in the entity.
7513 * If it's not NULL we leave it alone. This is somewhat of a
7514 * hack - maybe we should have further tests to determine
7515 * what to do.
7516 */
7517 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7518 /*
7519 * Seems we are generating the DOM content, do
7520 * a simple tree copy for all references except the first
7521 * In the first occurrence list contains the replacement.
7522 */
7523 if (((list == NULL) && (ent->owner == 0)) ||
7524 (ctxt->parseMode == XML_PARSE_READER)) {
7525 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7526
7527 /*
7528 * We are copying here, make sure there is no abuse
7529 */
7530 ctxt->sizeentcopy += ent->length + 5;
7531 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7532 return;
7533
7534 /*
7535 * when operating on a reader, the entities definitions
7536 * are always owning the entities subtree.
7537 if (ctxt->parseMode == XML_PARSE_READER)
7538 ent->owner = 1;
7539 */
7540
7541 cur = ent->children;
7542 while (cur != NULL) {
7543 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7544 if (nw != NULL) {
7545 if (nw->_private == NULL)
7546 nw->_private = cur->_private;
7547 if (firstChild == NULL){
7548 firstChild = nw;
7549 }
7550 nw = xmlAddChild(ctxt->node, nw);
7551 }
7552 if (cur == ent->last) {
7553 /*
7554 * needed to detect some strange empty
7555 * node cases in the reader tests
7556 */
7557 if ((ctxt->parseMode == XML_PARSE_READER) &&
7558 (nw != NULL) &&
7559 (nw->type == XML_ELEMENT_NODE) &&
7560 (nw->children == NULL))
7561 nw->extra = 1;
7562
7563 break;
7564 }
7565 cur = cur->next;
7566 }
7567 #ifdef LIBXML_LEGACY_ENABLED
7568 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7569 xmlAddEntityReference(ent, firstChild, nw);
7570 #endif /* LIBXML_LEGACY_ENABLED */
7571 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7572 xmlNodePtr nw = NULL, cur, next, last,
7573 firstChild = NULL;
7574
7575 /*
7576 * We are copying here, make sure there is no abuse
7577 */
7578 ctxt->sizeentcopy += ent->length + 5;
7579 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7580 return;
7581
7582 /*
7583 * Copy the entity child list and make it the new
7584 * entity child list. The goal is to make sure any
7585 * ID or REF referenced will be the one from the
7586 * document content and not the entity copy.
7587 */
7588 cur = ent->children;
7589 ent->children = NULL;
7590 last = ent->last;
7591 ent->last = NULL;
7592 while (cur != NULL) {
7593 next = cur->next;
7594 cur->next = NULL;
7595 cur->parent = NULL;
7596 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7597 if (nw != NULL) {
7598 if (nw->_private == NULL)
7599 nw->_private = cur->_private;
7600 if (firstChild == NULL){
7601 firstChild = cur;
7602 }
7603 xmlAddChild((xmlNodePtr) ent, nw);
7604 xmlAddChild(ctxt->node, cur);
7605 }
7606 if (cur == last)
7607 break;
7608 cur = next;
7609 }
7610 if (ent->owner == 0)
7611 ent->owner = 1;
7612 #ifdef LIBXML_LEGACY_ENABLED
7613 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7614 xmlAddEntityReference(ent, firstChild, nw);
7615 #endif /* LIBXML_LEGACY_ENABLED */
7616 } else {
7617 const xmlChar *nbktext;
7618
7619 /*
7620 * the name change is to avoid coalescing of the
7621 * node with a possible previous text one which
7622 * would make ent->children a dangling pointer
7623 */
7624 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7625 -1);
7626 if (ent->children->type == XML_TEXT_NODE)
7627 ent->children->name = nbktext;
7628 if ((ent->last != ent->children) &&
7629 (ent->last->type == XML_TEXT_NODE))
7630 ent->last->name = nbktext;
7631 xmlAddChildList(ctxt->node, ent->children);
7632 }
7633
7634 /*
7635 * This is to avoid a nasty side effect, see
7636 * characters() in SAX.c
7637 */
7638 ctxt->nodemem = 0;
7639 ctxt->nodelen = 0;
7640 return;
7641 }
7642 }
7643 }
7644
7645 /**
7646 * xmlParseEntityRef:
7647 * @ctxt: an XML parser context
7648 *
7649 * parse ENTITY references declarations
7650 *
7651 * [68] EntityRef ::= '&' Name ';'
7652 *
7653 * [ WFC: Entity Declared ]
7654 * In a document without any DTD, a document with only an internal DTD
7655 * subset which contains no parameter entity references, or a document
7656 * with "standalone='yes'", the Name given in the entity reference
7657 * must match that in an entity declaration, except that well-formed
7658 * documents need not declare any of the following entities: amp, lt,
7659 * gt, apos, quot. The declaration of a parameter entity must precede
7660 * any reference to it. Similarly, the declaration of a general entity
7661 * must precede any reference to it which appears in a default value in an
7662 * attribute-list declaration. Note that if entities are declared in the
7663 * external subset or in external parameter entities, a non-validating
7664 * processor is not obligated to read and process their declarations;
7665 * for such documents, the rule that an entity must be declared is a
7666 * well-formedness constraint only if standalone='yes'.
7667 *
7668 * [ WFC: Parsed Entity ]
7669 * An entity reference must not contain the name of an unparsed entity
7670 *
7671 * Returns the xmlEntityPtr if found, or NULL otherwise.
7672 */
7673 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7674 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7675 const xmlChar *name;
7676 xmlEntityPtr ent = NULL;
7677
7678 GROW;
7679 if (ctxt->instate == XML_PARSER_EOF)
7680 return(NULL);
7681
7682 if (RAW != '&')
7683 return(NULL);
7684 NEXT;
7685 name = xmlParseName(ctxt);
7686 if (name == NULL) {
7687 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7688 "xmlParseEntityRef: no name\n");
7689 return(NULL);
7690 }
7691 if (RAW != ';') {
7692 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7693 return(NULL);
7694 }
7695 NEXT;
7696
7697 /*
7698 * Predefined entities override any extra definition
7699 */
7700 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7701 ent = xmlGetPredefinedEntity(name);
7702 if (ent != NULL)
7703 return(ent);
7704 }
7705
7706 /*
7707 * Increase the number of entity references parsed
7708 */
7709 ctxt->nbentities++;
7710
7711 /*
7712 * Ask first SAX for entity resolution, otherwise try the
7713 * entities which may have stored in the parser context.
7714 */
7715 if (ctxt->sax != NULL) {
7716 if (ctxt->sax->getEntity != NULL)
7717 ent = ctxt->sax->getEntity(ctxt->userData, name);
7718 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7719 (ctxt->options & XML_PARSE_OLDSAX))
7720 ent = xmlGetPredefinedEntity(name);
7721 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7722 (ctxt->userData==ctxt)) {
7723 ent = xmlSAX2GetEntity(ctxt, name);
7724 }
7725 }
7726 if (ctxt->instate == XML_PARSER_EOF)
7727 return(NULL);
7728 /*
7729 * [ WFC: Entity Declared ]
7730 * In a document without any DTD, a document with only an
7731 * internal DTD subset which contains no parameter entity
7732 * references, or a document with "standalone='yes'", the
7733 * Name given in the entity reference must match that in an
7734 * entity declaration, except that well-formed documents
7735 * need not declare any of the following entities: amp, lt,
7736 * gt, apos, quot.
7737 * The declaration of a parameter entity must precede any
7738 * reference to it.
7739 * Similarly, the declaration of a general entity must
7740 * precede any reference to it which appears in a default
7741 * value in an attribute-list declaration. Note that if
7742 * entities are declared in the external subset or in
7743 * external parameter entities, a non-validating processor
7744 * is not obligated to read and process their declarations;
7745 * for such documents, the rule that an entity must be
7746 * declared is a well-formedness constraint only if
7747 * standalone='yes'.
7748 */
7749 if (ent == NULL) {
7750 if ((ctxt->standalone == 1) ||
7751 ((ctxt->hasExternalSubset == 0) &&
7752 (ctxt->hasPErefs == 0))) {
7753 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7754 "Entity '%s' not defined\n", name);
7755 } else {
7756 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7757 "Entity '%s' not defined\n", name);
7758 if ((ctxt->inSubset == 0) &&
7759 (ctxt->sax != NULL) &&
7760 (ctxt->sax->reference != NULL)) {
7761 ctxt->sax->reference(ctxt->userData, name);
7762 }
7763 }
7764 xmlParserEntityCheck(ctxt, 0, ent, 0);
7765 ctxt->valid = 0;
7766 }
7767
7768 /*
7769 * [ WFC: Parsed Entity ]
7770 * An entity reference must not contain the name of an
7771 * unparsed entity
7772 */
7773 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7774 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7775 "Entity reference to unparsed entity %s\n", name);
7776 }
7777
7778 /*
7779 * [ WFC: No External Entity References ]
7780 * Attribute values cannot contain direct or indirect
7781 * entity references to external entities.
7782 */
7783 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7784 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7785 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7786 "Attribute references external entity '%s'\n", name);
7787 }
7788 /*
7789 * [ WFC: No < in Attribute Values ]
7790 * The replacement text of any entity referred to directly or
7791 * indirectly in an attribute value (other than "<") must
7792 * not contain a <.
7793 */
7794 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7795 (ent != NULL) &&
7796 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7797 if (((ent->checked & 1) || (ent->checked == 0)) &&
7798 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7799 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7800 "'<' in entity '%s' is not allowed in attributes values\n", name);
7801 }
7802 }
7803
7804 /*
7805 * Internal check, no parameter entities here ...
7806 */
7807 else {
7808 switch (ent->etype) {
7809 case XML_INTERNAL_PARAMETER_ENTITY:
7810 case XML_EXTERNAL_PARAMETER_ENTITY:
7811 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7812 "Attempt to reference the parameter entity '%s'\n",
7813 name);
7814 break;
7815 default:
7816 break;
7817 }
7818 }
7819
7820 /*
7821 * [ WFC: No Recursion ]
7822 * A parsed entity must not contain a recursive reference
7823 * to itself, either directly or indirectly.
7824 * Done somewhere else
7825 */
7826 return(ent);
7827 }
7828
7829 /**
7830 * xmlParseStringEntityRef:
7831 * @ctxt: an XML parser context
7832 * @str: a pointer to an index in the string
7833 *
7834 * parse ENTITY references declarations, but this version parses it from
7835 * a string value.
7836 *
7837 * [68] EntityRef ::= '&' Name ';'
7838 *
7839 * [ WFC: Entity Declared ]
7840 * In a document without any DTD, a document with only an internal DTD
7841 * subset which contains no parameter entity references, or a document
7842 * with "standalone='yes'", the Name given in the entity reference
7843 * must match that in an entity declaration, except that well-formed
7844 * documents need not declare any of the following entities: amp, lt,
7845 * gt, apos, quot. The declaration of a parameter entity must precede
7846 * any reference to it. Similarly, the declaration of a general entity
7847 * must precede any reference to it which appears in a default value in an
7848 * attribute-list declaration. Note that if entities are declared in the
7849 * external subset or in external parameter entities, a non-validating
7850 * processor is not obligated to read and process their declarations;
7851 * for such documents, the rule that an entity must be declared is a
7852 * well-formedness constraint only if standalone='yes'.
7853 *
7854 * [ WFC: Parsed Entity ]
7855 * An entity reference must not contain the name of an unparsed entity
7856 *
7857 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7858 * is updated to the current location in the string.
7859 */
7860 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7861 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7862 xmlChar *name;
7863 const xmlChar *ptr;
7864 xmlChar cur;
7865 xmlEntityPtr ent = NULL;
7866
7867 if ((str == NULL) || (*str == NULL))
7868 return(NULL);
7869 ptr = *str;
7870 cur = *ptr;
7871 if (cur != '&')
7872 return(NULL);
7873
7874 ptr++;
7875 name = xmlParseStringName(ctxt, &ptr);
7876 if (name == NULL) {
7877 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7878 "xmlParseStringEntityRef: no name\n");
7879 *str = ptr;
7880 return(NULL);
7881 }
7882 if (*ptr != ';') {
7883 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7884 xmlFree(name);
7885 *str = ptr;
7886 return(NULL);
7887 }
7888 ptr++;
7889
7890
7891 /*
7892 * Predefined entities override any extra definition
7893 */
7894 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7895 ent = xmlGetPredefinedEntity(name);
7896 if (ent != NULL) {
7897 xmlFree(name);
7898 *str = ptr;
7899 return(ent);
7900 }
7901 }
7902
7903 /*
7904 * Increate the number of entity references parsed
7905 */
7906 ctxt->nbentities++;
7907
7908 /*
7909 * Ask first SAX for entity resolution, otherwise try the
7910 * entities which may have stored in the parser context.
7911 */
7912 if (ctxt->sax != NULL) {
7913 if (ctxt->sax->getEntity != NULL)
7914 ent = ctxt->sax->getEntity(ctxt->userData, name);
7915 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7916 ent = xmlGetPredefinedEntity(name);
7917 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7918 ent = xmlSAX2GetEntity(ctxt, name);
7919 }
7920 }
7921 if (ctxt->instate == XML_PARSER_EOF) {
7922 xmlFree(name);
7923 return(NULL);
7924 }
7925
7926 /*
7927 * [ WFC: Entity Declared ]
7928 * In a document without any DTD, a document with only an
7929 * internal DTD subset which contains no parameter entity
7930 * references, or a document with "standalone='yes'", the
7931 * Name given in the entity reference must match that in an
7932 * entity declaration, except that well-formed documents
7933 * need not declare any of the following entities: amp, lt,
7934 * gt, apos, quot.
7935 * The declaration of a parameter entity must precede any
7936 * reference to it.
7937 * Similarly, the declaration of a general entity must
7938 * precede any reference to it which appears in a default
7939 * value in an attribute-list declaration. Note that if
7940 * entities are declared in the external subset or in
7941 * external parameter entities, a non-validating processor
7942 * is not obligated to read and process their declarations;
7943 * for such documents, the rule that an entity must be
7944 * declared is a well-formedness constraint only if
7945 * standalone='yes'.
7946 */
7947 if (ent == NULL) {
7948 if ((ctxt->standalone == 1) ||
7949 ((ctxt->hasExternalSubset == 0) &&
7950 (ctxt->hasPErefs == 0))) {
7951 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7952 "Entity '%s' not defined\n", name);
7953 } else {
7954 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7955 "Entity '%s' not defined\n",
7956 name);
7957 }
7958 xmlParserEntityCheck(ctxt, 0, ent, 0);
7959 /* TODO ? check regressions ctxt->valid = 0; */
7960 }
7961
7962 /*
7963 * [ WFC: Parsed Entity ]
7964 * An entity reference must not contain the name of an
7965 * unparsed entity
7966 */
7967 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7968 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7969 "Entity reference to unparsed entity %s\n", name);
7970 }
7971
7972 /*
7973 * [ WFC: No External Entity References ]
7974 * Attribute values cannot contain direct or indirect
7975 * entity references to external entities.
7976 */
7977 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7978 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7979 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7980 "Attribute references external entity '%s'\n", name);
7981 }
7982 /*
7983 * [ WFC: No < in Attribute Values ]
7984 * The replacement text of any entity referred to directly or
7985 * indirectly in an attribute value (other than "<") must
7986 * not contain a <.
7987 */
7988 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7989 (ent != NULL) && (ent->content != NULL) &&
7990 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7991 (xmlStrchr(ent->content, '<'))) {
7992 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7993 "'<' in entity '%s' is not allowed in attributes values\n",
7994 name);
7995 }
7996
7997 /*
7998 * Internal check, no parameter entities here ...
7999 */
8000 else {
8001 switch (ent->etype) {
8002 case XML_INTERNAL_PARAMETER_ENTITY:
8003 case XML_EXTERNAL_PARAMETER_ENTITY:
8004 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
8005 "Attempt to reference the parameter entity '%s'\n",
8006 name);
8007 break;
8008 default:
8009 break;
8010 }
8011 }
8012
8013 /*
8014 * [ WFC: No Recursion ]
8015 * A parsed entity must not contain a recursive reference
8016 * to itself, either directly or indirectly.
8017 * Done somewhere else
8018 */
8019
8020 xmlFree(name);
8021 *str = ptr;
8022 return(ent);
8023 }
8024
8025 /**
8026 * xmlParsePEReference:
8027 * @ctxt: an XML parser context
8028 *
8029 * parse PEReference declarations
8030 * The entity content is handled directly by pushing it's content as
8031 * a new input stream.
8032 *
8033 * [69] PEReference ::= '%' Name ';'
8034 *
8035 * [ WFC: No Recursion ]
8036 * A parsed entity must not contain a recursive
8037 * reference to itself, either directly or indirectly.
8038 *
8039 * [ WFC: Entity Declared ]
8040 * In a document without any DTD, a document with only an internal DTD
8041 * subset which contains no parameter entity references, or a document
8042 * with "standalone='yes'", ... ... The declaration of a parameter
8043 * entity must precede any reference to it...
8044 *
8045 * [ VC: Entity Declared ]
8046 * In a document with an external subset or external parameter entities
8047 * with "standalone='no'", ... ... The declaration of a parameter entity
8048 * must precede any reference to it...
8049 *
8050 * [ WFC: In DTD ]
8051 * Parameter-entity references may only appear in the DTD.
8052 * NOTE: misleading but this is handled.
8053 */
8054 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)8055 xmlParsePEReference(xmlParserCtxtPtr ctxt)
8056 {
8057 const xmlChar *name;
8058 xmlEntityPtr entity = NULL;
8059 xmlParserInputPtr input;
8060
8061 if (RAW != '%')
8062 return;
8063 NEXT;
8064 name = xmlParseName(ctxt);
8065 if (name == NULL) {
8066 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8067 "xmlParsePEReference: no name\n");
8068 return;
8069 }
8070 if (RAW != ';') {
8071 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8072 return;
8073 }
8074
8075 NEXT;
8076
8077 /*
8078 * Increate the number of entity references parsed
8079 */
8080 ctxt->nbentities++;
8081
8082 /*
8083 * Request the entity from SAX
8084 */
8085 if ((ctxt->sax != NULL) &&
8086 (ctxt->sax->getParameterEntity != NULL))
8087 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8088 if (ctxt->instate == XML_PARSER_EOF)
8089 return;
8090 if (entity == NULL) {
8091 /*
8092 * [ WFC: Entity Declared ]
8093 * In a document without any DTD, a document with only an
8094 * internal DTD subset which contains no parameter entity
8095 * references, or a document with "standalone='yes'", ...
8096 * ... The declaration of a parameter entity must precede
8097 * any reference to it...
8098 */
8099 if ((ctxt->standalone == 1) ||
8100 ((ctxt->hasExternalSubset == 0) &&
8101 (ctxt->hasPErefs == 0))) {
8102 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8103 "PEReference: %%%s; not found\n",
8104 name);
8105 } else {
8106 /*
8107 * [ VC: Entity Declared ]
8108 * In a document with an external subset or external
8109 * parameter entities with "standalone='no'", ...
8110 * ... The declaration of a parameter entity must
8111 * precede any reference to it...
8112 */
8113 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8114 "PEReference: %%%s; not found\n",
8115 name, NULL);
8116 ctxt->valid = 0;
8117 }
8118 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8119 } else {
8120 /*
8121 * Internal checking in case the entity quest barfed
8122 */
8123 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8124 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8125 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8126 "Internal: %%%s; is not a parameter entity\n",
8127 name, NULL);
8128 } else if (ctxt->input->free != deallocblankswrapper) {
8129 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
8130 if (xmlPushInput(ctxt, input) < 0)
8131 return;
8132 } else {
8133 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8134 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8135 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8136 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8137 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8138 (ctxt->replaceEntities == 0) &&
8139 (ctxt->validate == 0))
8140 return;
8141 /*
8142 * TODO !!!
8143 * handle the extra spaces added before and after
8144 * c.f. http://www.w3.org/TR/REC-xml#as-PE
8145 */
8146 input = xmlNewEntityInputStream(ctxt, entity);
8147 if (xmlPushInput(ctxt, input) < 0)
8148 return;
8149 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8150 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8151 (IS_BLANK_CH(NXT(5)))) {
8152 xmlParseTextDecl(ctxt);
8153 if (ctxt->errNo ==
8154 XML_ERR_UNSUPPORTED_ENCODING) {
8155 /*
8156 * The XML REC instructs us to stop parsing
8157 * right here
8158 */
8159 xmlHaltParser(ctxt);
8160 return;
8161 }
8162 }
8163 }
8164 }
8165 ctxt->hasPErefs = 1;
8166 }
8167
8168 /**
8169 * xmlLoadEntityContent:
8170 * @ctxt: an XML parser context
8171 * @entity: an unloaded system entity
8172 *
8173 * Load the original content of the given system entity from the
8174 * ExternalID/SystemID given. This is to be used for Included in Literal
8175 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8176 *
8177 * Returns 0 in case of success and -1 in case of failure
8178 */
8179 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)8180 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8181 xmlParserInputPtr input;
8182 xmlBufferPtr buf;
8183 int l, c;
8184 int count = 0;
8185
8186 if ((ctxt == NULL) || (entity == NULL) ||
8187 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8188 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8189 (entity->content != NULL)) {
8190 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8191 "xmlLoadEntityContent parameter error");
8192 return(-1);
8193 }
8194
8195 if (xmlParserDebugEntities)
8196 xmlGenericError(xmlGenericErrorContext,
8197 "Reading %s entity content input\n", entity->name);
8198
8199 buf = xmlBufferCreate();
8200 if (buf == NULL) {
8201 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8202 "xmlLoadEntityContent parameter error");
8203 return(-1);
8204 }
8205
8206 input = xmlNewEntityInputStream(ctxt, entity);
8207 if (input == NULL) {
8208 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8209 "xmlLoadEntityContent input error");
8210 xmlBufferFree(buf);
8211 return(-1);
8212 }
8213
8214 /*
8215 * Push the entity as the current input, read char by char
8216 * saving to the buffer until the end of the entity or an error
8217 */
8218 if (xmlPushInput(ctxt, input) < 0) {
8219 xmlBufferFree(buf);
8220 return(-1);
8221 }
8222
8223 GROW;
8224 c = CUR_CHAR(l);
8225 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8226 (IS_CHAR(c))) {
8227 xmlBufferAdd(buf, ctxt->input->cur, l);
8228 if (count++ > XML_PARSER_CHUNK_SIZE) {
8229 count = 0;
8230 GROW;
8231 if (ctxt->instate == XML_PARSER_EOF) {
8232 xmlBufferFree(buf);
8233 return(-1);
8234 }
8235 }
8236 NEXTL(l);
8237 c = CUR_CHAR(l);
8238 if (c == 0) {
8239 count = 0;
8240 GROW;
8241 if (ctxt->instate == XML_PARSER_EOF) {
8242 xmlBufferFree(buf);
8243 return(-1);
8244 }
8245 c = CUR_CHAR(l);
8246 }
8247 }
8248
8249 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8250 xmlPopInput(ctxt);
8251 } else if (!IS_CHAR(c)) {
8252 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8253 "xmlLoadEntityContent: invalid char value %d\n",
8254 c);
8255 xmlBufferFree(buf);
8256 return(-1);
8257 }
8258 entity->content = buf->content;
8259 buf->content = NULL;
8260 xmlBufferFree(buf);
8261
8262 return(0);
8263 }
8264
8265 /**
8266 * xmlParseStringPEReference:
8267 * @ctxt: an XML parser context
8268 * @str: a pointer to an index in the string
8269 *
8270 * parse PEReference declarations
8271 *
8272 * [69] PEReference ::= '%' Name ';'
8273 *
8274 * [ WFC: No Recursion ]
8275 * A parsed entity must not contain a recursive
8276 * reference to itself, either directly or indirectly.
8277 *
8278 * [ WFC: Entity Declared ]
8279 * In a document without any DTD, a document with only an internal DTD
8280 * subset which contains no parameter entity references, or a document
8281 * with "standalone='yes'", ... ... The declaration of a parameter
8282 * entity must precede any reference to it...
8283 *
8284 * [ VC: Entity Declared ]
8285 * In a document with an external subset or external parameter entities
8286 * with "standalone='no'", ... ... The declaration of a parameter entity
8287 * must precede any reference to it...
8288 *
8289 * [ WFC: In DTD ]
8290 * Parameter-entity references may only appear in the DTD.
8291 * NOTE: misleading but this is handled.
8292 *
8293 * Returns the string of the entity content.
8294 * str is updated to the current value of the index
8295 */
8296 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)8297 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8298 const xmlChar *ptr;
8299 xmlChar cur;
8300 xmlChar *name;
8301 xmlEntityPtr entity = NULL;
8302
8303 if ((str == NULL) || (*str == NULL)) return(NULL);
8304 ptr = *str;
8305 cur = *ptr;
8306 if (cur != '%')
8307 return(NULL);
8308 ptr++;
8309 name = xmlParseStringName(ctxt, &ptr);
8310 if (name == NULL) {
8311 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8312 "xmlParseStringPEReference: no name\n");
8313 *str = ptr;
8314 return(NULL);
8315 }
8316 cur = *ptr;
8317 if (cur != ';') {
8318 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8319 xmlFree(name);
8320 *str = ptr;
8321 return(NULL);
8322 }
8323 ptr++;
8324
8325 /*
8326 * Increate the number of entity references parsed
8327 */
8328 ctxt->nbentities++;
8329
8330 /*
8331 * Request the entity from SAX
8332 */
8333 if ((ctxt->sax != NULL) &&
8334 (ctxt->sax->getParameterEntity != NULL))
8335 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8336 if (ctxt->instate == XML_PARSER_EOF) {
8337 xmlFree(name);
8338 return(NULL);
8339 }
8340 if (entity == NULL) {
8341 /*
8342 * [ WFC: Entity Declared ]
8343 * In a document without any DTD, a document with only an
8344 * internal DTD subset which contains no parameter entity
8345 * references, or a document with "standalone='yes'", ...
8346 * ... The declaration of a parameter entity must precede
8347 * any reference to it...
8348 */
8349 if ((ctxt->standalone == 1) ||
8350 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8351 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8352 "PEReference: %%%s; not found\n", name);
8353 } else {
8354 /*
8355 * [ VC: Entity Declared ]
8356 * In a document with an external subset or external
8357 * parameter entities with "standalone='no'", ...
8358 * ... The declaration of a parameter entity must
8359 * precede any reference to it...
8360 */
8361 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8362 "PEReference: %%%s; not found\n",
8363 name, NULL);
8364 ctxt->valid = 0;
8365 }
8366 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8367 } else {
8368 /*
8369 * Internal checking in case the entity quest barfed
8370 */
8371 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8372 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8373 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8374 "%%%s; is not a parameter entity\n",
8375 name, NULL);
8376 }
8377 }
8378 ctxt->hasPErefs = 1;
8379 xmlFree(name);
8380 *str = ptr;
8381 return(entity);
8382 }
8383
8384 /**
8385 * xmlParseDocTypeDecl:
8386 * @ctxt: an XML parser context
8387 *
8388 * parse a DOCTYPE declaration
8389 *
8390 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8391 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8392 *
8393 * [ VC: Root Element Type ]
8394 * The Name in the document type declaration must match the element
8395 * type of the root element.
8396 */
8397
8398 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)8399 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8400 const xmlChar *name = NULL;
8401 xmlChar *ExternalID = NULL;
8402 xmlChar *URI = NULL;
8403
8404 /*
8405 * We know that '<!DOCTYPE' has been detected.
8406 */
8407 SKIP(9);
8408
8409 SKIP_BLANKS;
8410
8411 /*
8412 * Parse the DOCTYPE name.
8413 */
8414 name = xmlParseName(ctxt);
8415 if (name == NULL) {
8416 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8417 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8418 }
8419 ctxt->intSubName = name;
8420
8421 SKIP_BLANKS;
8422
8423 /*
8424 * Check for SystemID and ExternalID
8425 */
8426 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8427
8428 if ((URI != NULL) || (ExternalID != NULL)) {
8429 ctxt->hasExternalSubset = 1;
8430 }
8431 ctxt->extSubURI = URI;
8432 ctxt->extSubSystem = ExternalID;
8433
8434 SKIP_BLANKS;
8435
8436 /*
8437 * Create and update the internal subset.
8438 */
8439 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8440 (!ctxt->disableSAX))
8441 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8442 if (ctxt->instate == XML_PARSER_EOF)
8443 return;
8444
8445 /*
8446 * Is there any internal subset declarations ?
8447 * they are handled separately in xmlParseInternalSubset()
8448 */
8449 if (RAW == '[')
8450 return;
8451
8452 /*
8453 * We should be at the end of the DOCTYPE declaration.
8454 */
8455 if (RAW != '>') {
8456 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8457 }
8458 NEXT;
8459 }
8460
8461 /**
8462 * xmlParseInternalSubset:
8463 * @ctxt: an XML parser context
8464 *
8465 * parse the internal subset declaration
8466 *
8467 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8468 */
8469
8470 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8471 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8472 /*
8473 * Is there any DTD definition ?
8474 */
8475 if (RAW == '[') {
8476 ctxt->instate = XML_PARSER_DTD;
8477 NEXT;
8478 /*
8479 * Parse the succession of Markup declarations and
8480 * PEReferences.
8481 * Subsequence (markupdecl | PEReference | S)*
8482 */
8483 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
8484 const xmlChar *check = CUR_PTR;
8485 unsigned int cons = ctxt->input->consumed;
8486
8487 SKIP_BLANKS;
8488 xmlParseMarkupDecl(ctxt);
8489 xmlParsePEReference(ctxt);
8490
8491 /*
8492 * Pop-up of finished entities.
8493 */
8494 while ((RAW == 0) && (ctxt->inputNr > 1))
8495 xmlPopInput(ctxt);
8496
8497 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8498 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8499 "xmlParseInternalSubset: error detected in Markup declaration\n");
8500 break;
8501 }
8502 }
8503 if (RAW == ']') {
8504 NEXT;
8505 SKIP_BLANKS;
8506 }
8507 }
8508
8509 /*
8510 * We should be at the end of the DOCTYPE declaration.
8511 */
8512 if (RAW != '>') {
8513 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8514 return;
8515 }
8516 NEXT;
8517 }
8518
8519 #ifdef LIBXML_SAX1_ENABLED
8520 /**
8521 * xmlParseAttribute:
8522 * @ctxt: an XML parser context
8523 * @value: a xmlChar ** used to store the value of the attribute
8524 *
8525 * parse an attribute
8526 *
8527 * [41] Attribute ::= Name Eq AttValue
8528 *
8529 * [ WFC: No External Entity References ]
8530 * Attribute values cannot contain direct or indirect entity references
8531 * to external entities.
8532 *
8533 * [ WFC: No < in Attribute Values ]
8534 * The replacement text of any entity referred to directly or indirectly in
8535 * an attribute value (other than "<") must not contain a <.
8536 *
8537 * [ VC: Attribute Value Type ]
8538 * The attribute must have been declared; the value must be of the type
8539 * declared for it.
8540 *
8541 * [25] Eq ::= S? '=' S?
8542 *
8543 * With namespace:
8544 *
8545 * [NS 11] Attribute ::= QName Eq AttValue
8546 *
8547 * Also the case QName == xmlns:??? is handled independently as a namespace
8548 * definition.
8549 *
8550 * Returns the attribute name, and the value in *value.
8551 */
8552
8553 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8554 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8555 const xmlChar *name;
8556 xmlChar *val;
8557
8558 *value = NULL;
8559 GROW;
8560 name = xmlParseName(ctxt);
8561 if (name == NULL) {
8562 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8563 "error parsing attribute name\n");
8564 return(NULL);
8565 }
8566
8567 /*
8568 * read the value
8569 */
8570 SKIP_BLANKS;
8571 if (RAW == '=') {
8572 NEXT;
8573 SKIP_BLANKS;
8574 val = xmlParseAttValue(ctxt);
8575 ctxt->instate = XML_PARSER_CONTENT;
8576 } else {
8577 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8578 "Specification mandate value for attribute %s\n", name);
8579 return(NULL);
8580 }
8581
8582 /*
8583 * Check that xml:lang conforms to the specification
8584 * No more registered as an error, just generate a warning now
8585 * since this was deprecated in XML second edition
8586 */
8587 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8588 if (!xmlCheckLanguageID(val)) {
8589 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8590 "Malformed value for xml:lang : %s\n",
8591 val, NULL);
8592 }
8593 }
8594
8595 /*
8596 * Check that xml:space conforms to the specification
8597 */
8598 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8599 if (xmlStrEqual(val, BAD_CAST "default"))
8600 *(ctxt->space) = 0;
8601 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8602 *(ctxt->space) = 1;
8603 else {
8604 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8605 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8606 val, NULL);
8607 }
8608 }
8609
8610 *value = val;
8611 return(name);
8612 }
8613
8614 /**
8615 * xmlParseStartTag:
8616 * @ctxt: an XML parser context
8617 *
8618 * parse a start of tag either for rule element or
8619 * EmptyElement. In both case we don't parse the tag closing chars.
8620 *
8621 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8622 *
8623 * [ WFC: Unique Att Spec ]
8624 * No attribute name may appear more than once in the same start-tag or
8625 * empty-element tag.
8626 *
8627 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8628 *
8629 * [ WFC: Unique Att Spec ]
8630 * No attribute name may appear more than once in the same start-tag or
8631 * empty-element tag.
8632 *
8633 * With namespace:
8634 *
8635 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8636 *
8637 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8638 *
8639 * Returns the element name parsed
8640 */
8641
8642 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8643 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8644 const xmlChar *name;
8645 const xmlChar *attname;
8646 xmlChar *attvalue;
8647 const xmlChar **atts = ctxt->atts;
8648 int nbatts = 0;
8649 int maxatts = ctxt->maxatts;
8650 int i;
8651
8652 if (RAW != '<') return(NULL);
8653 NEXT1;
8654
8655 name = xmlParseName(ctxt);
8656 if (name == NULL) {
8657 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8658 "xmlParseStartTag: invalid element name\n");
8659 return(NULL);
8660 }
8661
8662 /*
8663 * Now parse the attributes, it ends up with the ending
8664 *
8665 * (S Attribute)* S?
8666 */
8667 SKIP_BLANKS;
8668 GROW;
8669
8670 while (((RAW != '>') &&
8671 ((RAW != '/') || (NXT(1) != '>')) &&
8672 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8673 const xmlChar *q = CUR_PTR;
8674 unsigned int cons = ctxt->input->consumed;
8675
8676 attname = xmlParseAttribute(ctxt, &attvalue);
8677 if ((attname != NULL) && (attvalue != NULL)) {
8678 /*
8679 * [ WFC: Unique Att Spec ]
8680 * No attribute name may appear more than once in the same
8681 * start-tag or empty-element tag.
8682 */
8683 for (i = 0; i < nbatts;i += 2) {
8684 if (xmlStrEqual(atts[i], attname)) {
8685 xmlErrAttributeDup(ctxt, NULL, attname);
8686 xmlFree(attvalue);
8687 goto failed;
8688 }
8689 }
8690 /*
8691 * Add the pair to atts
8692 */
8693 if (atts == NULL) {
8694 maxatts = 22; /* allow for 10 attrs by default */
8695 atts = (const xmlChar **)
8696 xmlMalloc(maxatts * sizeof(xmlChar *));
8697 if (atts == NULL) {
8698 xmlErrMemory(ctxt, NULL);
8699 if (attvalue != NULL)
8700 xmlFree(attvalue);
8701 goto failed;
8702 }
8703 ctxt->atts = atts;
8704 ctxt->maxatts = maxatts;
8705 } else if (nbatts + 4 > maxatts) {
8706 const xmlChar **n;
8707
8708 maxatts *= 2;
8709 n = (const xmlChar **) xmlRealloc((void *) atts,
8710 maxatts * sizeof(const xmlChar *));
8711 if (n == NULL) {
8712 xmlErrMemory(ctxt, NULL);
8713 if (attvalue != NULL)
8714 xmlFree(attvalue);
8715 goto failed;
8716 }
8717 atts = n;
8718 ctxt->atts = atts;
8719 ctxt->maxatts = maxatts;
8720 }
8721 atts[nbatts++] = attname;
8722 atts[nbatts++] = attvalue;
8723 atts[nbatts] = NULL;
8724 atts[nbatts + 1] = NULL;
8725 } else {
8726 if (attvalue != NULL)
8727 xmlFree(attvalue);
8728 }
8729
8730 failed:
8731
8732 GROW
8733 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8734 break;
8735 if (!IS_BLANK_CH(RAW)) {
8736 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8737 "attributes construct error\n");
8738 }
8739 SKIP_BLANKS;
8740 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8741 (attname == NULL) && (attvalue == NULL)) {
8742 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8743 "xmlParseStartTag: problem parsing attributes\n");
8744 break;
8745 }
8746 SHRINK;
8747 GROW;
8748 }
8749
8750 /*
8751 * SAX: Start of Element !
8752 */
8753 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8754 (!ctxt->disableSAX)) {
8755 if (nbatts > 0)
8756 ctxt->sax->startElement(ctxt->userData, name, atts);
8757 else
8758 ctxt->sax->startElement(ctxt->userData, name, NULL);
8759 }
8760
8761 if (atts != NULL) {
8762 /* Free only the content strings */
8763 for (i = 1;i < nbatts;i+=2)
8764 if (atts[i] != NULL)
8765 xmlFree((xmlChar *) atts[i]);
8766 }
8767 return(name);
8768 }
8769
8770 /**
8771 * xmlParseEndTag1:
8772 * @ctxt: an XML parser context
8773 * @line: line of the start tag
8774 * @nsNr: number of namespaces on the start tag
8775 *
8776 * parse an end of tag
8777 *
8778 * [42] ETag ::= '</' Name S? '>'
8779 *
8780 * With namespace
8781 *
8782 * [NS 9] ETag ::= '</' QName S? '>'
8783 */
8784
8785 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8786 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8787 const xmlChar *name;
8788
8789 GROW;
8790 if ((RAW != '<') || (NXT(1) != '/')) {
8791 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8792 "xmlParseEndTag: '</' not found\n");
8793 return;
8794 }
8795 SKIP(2);
8796
8797 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8798
8799 /*
8800 * We should definitely be at the ending "S? '>'" part
8801 */
8802 GROW;
8803 SKIP_BLANKS;
8804 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8805 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8806 } else
8807 NEXT1;
8808
8809 /*
8810 * [ WFC: Element Type Match ]
8811 * The Name in an element's end-tag must match the element type in the
8812 * start-tag.
8813 *
8814 */
8815 if (name != (xmlChar*)1) {
8816 if (name == NULL) name = BAD_CAST "unparseable";
8817 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8818 "Opening and ending tag mismatch: %s line %d and %s\n",
8819 ctxt->name, line, name);
8820 }
8821
8822 /*
8823 * SAX: End of Tag
8824 */
8825 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8826 (!ctxt->disableSAX))
8827 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8828
8829 namePop(ctxt);
8830 spacePop(ctxt);
8831 return;
8832 }
8833
8834 /**
8835 * xmlParseEndTag:
8836 * @ctxt: an XML parser context
8837 *
8838 * parse an end of tag
8839 *
8840 * [42] ETag ::= '</' Name S? '>'
8841 *
8842 * With namespace
8843 *
8844 * [NS 9] ETag ::= '</' QName S? '>'
8845 */
8846
8847 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8848 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8849 xmlParseEndTag1(ctxt, 0);
8850 }
8851 #endif /* LIBXML_SAX1_ENABLED */
8852
8853 /************************************************************************
8854 * *
8855 * SAX 2 specific operations *
8856 * *
8857 ************************************************************************/
8858
8859 /*
8860 * xmlGetNamespace:
8861 * @ctxt: an XML parser context
8862 * @prefix: the prefix to lookup
8863 *
8864 * Lookup the namespace name for the @prefix (which ca be NULL)
8865 * The prefix must come from the @ctxt->dict dictionary
8866 *
8867 * Returns the namespace name or NULL if not bound
8868 */
8869 static const xmlChar *
xmlGetNamespace(xmlParserCtxtPtr ctxt,const xmlChar * prefix)8870 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8871 int i;
8872
8873 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8874 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8875 if (ctxt->nsTab[i] == prefix) {
8876 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8877 return(NULL);
8878 return(ctxt->nsTab[i + 1]);
8879 }
8880 return(NULL);
8881 }
8882
8883 /**
8884 * xmlParseQName:
8885 * @ctxt: an XML parser context
8886 * @prefix: pointer to store the prefix part
8887 *
8888 * parse an XML Namespace QName
8889 *
8890 * [6] QName ::= (Prefix ':')? LocalPart
8891 * [7] Prefix ::= NCName
8892 * [8] LocalPart ::= NCName
8893 *
8894 * Returns the Name parsed or NULL
8895 */
8896
8897 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8898 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8899 const xmlChar *l, *p;
8900
8901 GROW;
8902
8903 l = xmlParseNCName(ctxt);
8904 if (l == NULL) {
8905 if (CUR == ':') {
8906 l = xmlParseName(ctxt);
8907 if (l != NULL) {
8908 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8909 "Failed to parse QName '%s'\n", l, NULL, NULL);
8910 *prefix = NULL;
8911 return(l);
8912 }
8913 }
8914 return(NULL);
8915 }
8916 if (CUR == ':') {
8917 NEXT;
8918 p = l;
8919 l = xmlParseNCName(ctxt);
8920 if (l == NULL) {
8921 xmlChar *tmp;
8922
8923 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8924 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8925 l = xmlParseNmtoken(ctxt);
8926 if (l == NULL)
8927 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8928 else {
8929 tmp = xmlBuildQName(l, p, NULL, 0);
8930 xmlFree((char *)l);
8931 }
8932 p = xmlDictLookup(ctxt->dict, tmp, -1);
8933 if (tmp != NULL) xmlFree(tmp);
8934 *prefix = NULL;
8935 return(p);
8936 }
8937 if (CUR == ':') {
8938 xmlChar *tmp;
8939
8940 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8941 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8942 NEXT;
8943 tmp = (xmlChar *) xmlParseName(ctxt);
8944 if (tmp != NULL) {
8945 tmp = xmlBuildQName(tmp, l, NULL, 0);
8946 l = xmlDictLookup(ctxt->dict, tmp, -1);
8947 if (tmp != NULL) xmlFree(tmp);
8948 *prefix = p;
8949 return(l);
8950 }
8951 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8952 l = xmlDictLookup(ctxt->dict, tmp, -1);
8953 if (tmp != NULL) xmlFree(tmp);
8954 *prefix = p;
8955 return(l);
8956 }
8957 *prefix = p;
8958 } else
8959 *prefix = NULL;
8960 return(l);
8961 }
8962
8963 /**
8964 * xmlParseQNameAndCompare:
8965 * @ctxt: an XML parser context
8966 * @name: the localname
8967 * @prefix: the prefix, if any.
8968 *
8969 * parse an XML name and compares for match
8970 * (specialized for endtag parsing)
8971 *
8972 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8973 * and the name for mismatch
8974 */
8975
8976 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8977 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8978 xmlChar const *prefix) {
8979 const xmlChar *cmp;
8980 const xmlChar *in;
8981 const xmlChar *ret;
8982 const xmlChar *prefix2;
8983
8984 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8985
8986 GROW;
8987 in = ctxt->input->cur;
8988
8989 cmp = prefix;
8990 while (*in != 0 && *in == *cmp) {
8991 ++in;
8992 ++cmp;
8993 }
8994 if ((*cmp == 0) && (*in == ':')) {
8995 in++;
8996 cmp = name;
8997 while (*in != 0 && *in == *cmp) {
8998 ++in;
8999 ++cmp;
9000 }
9001 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
9002 /* success */
9003 ctxt->input->cur = in;
9004 return((const xmlChar*) 1);
9005 }
9006 }
9007 /*
9008 * all strings coms from the dictionary, equality can be done directly
9009 */
9010 ret = xmlParseQName (ctxt, &prefix2);
9011 if ((ret == name) && (prefix == prefix2))
9012 return((const xmlChar*) 1);
9013 return ret;
9014 }
9015
9016 /**
9017 * xmlParseAttValueInternal:
9018 * @ctxt: an XML parser context
9019 * @len: attribute len result
9020 * @alloc: whether the attribute was reallocated as a new string
9021 * @normalize: if 1 then further non-CDATA normalization must be done
9022 *
9023 * parse a value for an attribute.
9024 * NOTE: if no normalization is needed, the routine will return pointers
9025 * directly from the data buffer.
9026 *
9027 * 3.3.3 Attribute-Value Normalization:
9028 * Before the value of an attribute is passed to the application or
9029 * checked for validity, the XML processor must normalize it as follows:
9030 * - a character reference is processed by appending the referenced
9031 * character to the attribute value
9032 * - an entity reference is processed by recursively processing the
9033 * replacement text of the entity
9034 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9035 * appending #x20 to the normalized value, except that only a single
9036 * #x20 is appended for a "#xD#xA" sequence that is part of an external
9037 * parsed entity or the literal entity value of an internal parsed entity
9038 * - other characters are processed by appending them to the normalized value
9039 * If the declared value is not CDATA, then the XML processor must further
9040 * process the normalized attribute value by discarding any leading and
9041 * trailing space (#x20) characters, and by replacing sequences of space
9042 * (#x20) characters by a single space (#x20) character.
9043 * All attributes for which no declaration has been read should be treated
9044 * by a non-validating parser as if declared CDATA.
9045 *
9046 * Returns the AttValue parsed or NULL. The value has to be freed by the
9047 * caller if it was copied, this can be detected by val[*len] == 0.
9048 */
9049
9050 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * len,int * alloc,int normalize)9051 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9052 int normalize)
9053 {
9054 xmlChar limit = 0;
9055 const xmlChar *in = NULL, *start, *end, *last;
9056 xmlChar *ret = NULL;
9057 int line, col;
9058
9059 GROW;
9060 in = (xmlChar *) CUR_PTR;
9061 line = ctxt->input->line;
9062 col = ctxt->input->col;
9063 if (*in != '"' && *in != '\'') {
9064 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9065 return (NULL);
9066 }
9067 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9068
9069 /*
9070 * try to handle in this routine the most common case where no
9071 * allocation of a new string is required and where content is
9072 * pure ASCII.
9073 */
9074 limit = *in++;
9075 col++;
9076 end = ctxt->input->end;
9077 start = in;
9078 if (in >= end) {
9079 const xmlChar *oldbase = ctxt->input->base;
9080 GROW;
9081 if (oldbase != ctxt->input->base) {
9082 long delta = ctxt->input->base - oldbase;
9083 start = start + delta;
9084 in = in + delta;
9085 }
9086 end = ctxt->input->end;
9087 }
9088 if (normalize) {
9089 /*
9090 * Skip any leading spaces
9091 */
9092 while ((in < end) && (*in != limit) &&
9093 ((*in == 0x20) || (*in == 0x9) ||
9094 (*in == 0xA) || (*in == 0xD))) {
9095 if (*in == 0xA) {
9096 line++; col = 1;
9097 } else {
9098 col++;
9099 }
9100 in++;
9101 start = in;
9102 if (in >= end) {
9103 const xmlChar *oldbase = ctxt->input->base;
9104 GROW;
9105 if (ctxt->instate == XML_PARSER_EOF)
9106 return(NULL);
9107 if (oldbase != ctxt->input->base) {
9108 long delta = ctxt->input->base - oldbase;
9109 start = start + delta;
9110 in = in + delta;
9111 }
9112 end = ctxt->input->end;
9113 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9114 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9115 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9116 "AttValue length too long\n");
9117 return(NULL);
9118 }
9119 }
9120 }
9121 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9122 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9123 col++;
9124 if ((*in++ == 0x20) && (*in == 0x20)) break;
9125 if (in >= end) {
9126 const xmlChar *oldbase = ctxt->input->base;
9127 GROW;
9128 if (ctxt->instate == XML_PARSER_EOF)
9129 return(NULL);
9130 if (oldbase != ctxt->input->base) {
9131 long delta = ctxt->input->base - oldbase;
9132 start = start + delta;
9133 in = in + delta;
9134 }
9135 end = ctxt->input->end;
9136 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9137 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9138 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9139 "AttValue length too long\n");
9140 return(NULL);
9141 }
9142 }
9143 }
9144 last = in;
9145 /*
9146 * skip the trailing blanks
9147 */
9148 while ((last[-1] == 0x20) && (last > start)) last--;
9149 while ((in < end) && (*in != limit) &&
9150 ((*in == 0x20) || (*in == 0x9) ||
9151 (*in == 0xA) || (*in == 0xD))) {
9152 if (*in == 0xA) {
9153 line++, col = 1;
9154 } else {
9155 col++;
9156 }
9157 in++;
9158 if (in >= end) {
9159 const xmlChar *oldbase = ctxt->input->base;
9160 GROW;
9161 if (ctxt->instate == XML_PARSER_EOF)
9162 return(NULL);
9163 if (oldbase != ctxt->input->base) {
9164 long delta = ctxt->input->base - oldbase;
9165 start = start + delta;
9166 in = in + delta;
9167 last = last + delta;
9168 }
9169 end = ctxt->input->end;
9170 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9171 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9172 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9173 "AttValue length too long\n");
9174 return(NULL);
9175 }
9176 }
9177 }
9178 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9179 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9180 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9181 "AttValue length too long\n");
9182 return(NULL);
9183 }
9184 if (*in != limit) goto need_complex;
9185 } else {
9186 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9187 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9188 in++;
9189 col++;
9190 if (in >= end) {
9191 const xmlChar *oldbase = ctxt->input->base;
9192 GROW;
9193 if (ctxt->instate == XML_PARSER_EOF)
9194 return(NULL);
9195 if (oldbase != ctxt->input->base) {
9196 long delta = ctxt->input->base - oldbase;
9197 start = start + delta;
9198 in = in + delta;
9199 }
9200 end = ctxt->input->end;
9201 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9202 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9203 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9204 "AttValue length too long\n");
9205 return(NULL);
9206 }
9207 }
9208 }
9209 last = in;
9210 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9211 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9212 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9213 "AttValue length too long\n");
9214 return(NULL);
9215 }
9216 if (*in != limit) goto need_complex;
9217 }
9218 in++;
9219 col++;
9220 if (len != NULL) {
9221 *len = last - start;
9222 ret = (xmlChar *) start;
9223 } else {
9224 if (alloc) *alloc = 1;
9225 ret = xmlStrndup(start, last - start);
9226 }
9227 CUR_PTR = in;
9228 ctxt->input->line = line;
9229 ctxt->input->col = col;
9230 if (alloc) *alloc = 0;
9231 return ret;
9232 need_complex:
9233 if (alloc) *alloc = 1;
9234 return xmlParseAttValueComplex(ctxt, len, normalize);
9235 }
9236
9237 /**
9238 * xmlParseAttribute2:
9239 * @ctxt: an XML parser context
9240 * @pref: the element prefix
9241 * @elem: the element name
9242 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9243 * @value: a xmlChar ** used to store the value of the attribute
9244 * @len: an int * to save the length of the attribute
9245 * @alloc: an int * to indicate if the attribute was allocated
9246 *
9247 * parse an attribute in the new SAX2 framework.
9248 *
9249 * Returns the attribute name, and the value in *value, .
9250 */
9251
9252 static const xmlChar *
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,const xmlChar ** prefix,xmlChar ** value,int * len,int * alloc)9253 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9254 const xmlChar * pref, const xmlChar * elem,
9255 const xmlChar ** prefix, xmlChar ** value,
9256 int *len, int *alloc)
9257 {
9258 const xmlChar *name;
9259 xmlChar *val, *internal_val = NULL;
9260 int normalize = 0;
9261
9262 *value = NULL;
9263 GROW;
9264 name = xmlParseQName(ctxt, prefix);
9265 if (name == NULL) {
9266 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9267 "error parsing attribute name\n");
9268 return (NULL);
9269 }
9270
9271 /*
9272 * get the type if needed
9273 */
9274 if (ctxt->attsSpecial != NULL) {
9275 int type;
9276
9277 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
9278 pref, elem, *prefix, name);
9279 if (type != 0)
9280 normalize = 1;
9281 }
9282
9283 /*
9284 * read the value
9285 */
9286 SKIP_BLANKS;
9287 if (RAW == '=') {
9288 NEXT;
9289 SKIP_BLANKS;
9290 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9291 if (normalize) {
9292 /*
9293 * Sometimes a second normalisation pass for spaces is needed
9294 * but that only happens if charrefs or entities refernces
9295 * have been used in the attribute value, i.e. the attribute
9296 * value have been extracted in an allocated string already.
9297 */
9298 if (*alloc) {
9299 const xmlChar *val2;
9300
9301 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9302 if ((val2 != NULL) && (val2 != val)) {
9303 xmlFree(val);
9304 val = (xmlChar *) val2;
9305 }
9306 }
9307 }
9308 ctxt->instate = XML_PARSER_CONTENT;
9309 } else {
9310 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9311 "Specification mandate value for attribute %s\n",
9312 name);
9313 return (NULL);
9314 }
9315
9316 if (*prefix == ctxt->str_xml) {
9317 /*
9318 * Check that xml:lang conforms to the specification
9319 * No more registered as an error, just generate a warning now
9320 * since this was deprecated in XML second edition
9321 */
9322 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9323 internal_val = xmlStrndup(val, *len);
9324 if (!xmlCheckLanguageID(internal_val)) {
9325 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9326 "Malformed value for xml:lang : %s\n",
9327 internal_val, NULL);
9328 }
9329 }
9330
9331 /*
9332 * Check that xml:space conforms to the specification
9333 */
9334 if (xmlStrEqual(name, BAD_CAST "space")) {
9335 internal_val = xmlStrndup(val, *len);
9336 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9337 *(ctxt->space) = 0;
9338 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9339 *(ctxt->space) = 1;
9340 else {
9341 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9342 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9343 internal_val, NULL);
9344 }
9345 }
9346 if (internal_val) {
9347 xmlFree(internal_val);
9348 }
9349 }
9350
9351 *value = val;
9352 return (name);
9353 }
9354 /**
9355 * xmlParseStartTag2:
9356 * @ctxt: an XML parser context
9357 *
9358 * parse a start of tag either for rule element or
9359 * EmptyElement. In both case we don't parse the tag closing chars.
9360 * This routine is called when running SAX2 parsing
9361 *
9362 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9363 *
9364 * [ WFC: Unique Att Spec ]
9365 * No attribute name may appear more than once in the same start-tag or
9366 * empty-element tag.
9367 *
9368 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9369 *
9370 * [ WFC: Unique Att Spec ]
9371 * No attribute name may appear more than once in the same start-tag or
9372 * empty-element tag.
9373 *
9374 * With namespace:
9375 *
9376 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9377 *
9378 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9379 *
9380 * Returns the element name parsed
9381 */
9382
9383 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * tlen)9384 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9385 const xmlChar **URI, int *tlen) {
9386 const xmlChar *localname;
9387 const xmlChar *prefix;
9388 const xmlChar *attname;
9389 const xmlChar *aprefix;
9390 const xmlChar *nsname;
9391 xmlChar *attvalue;
9392 const xmlChar **atts = ctxt->atts;
9393 int maxatts = ctxt->maxatts;
9394 int nratts, nbatts, nbdef;
9395 int i, j, nbNs, attval, oldline, oldcol, inputNr;
9396 const xmlChar *base;
9397 unsigned long cur;
9398 int nsNr = ctxt->nsNr;
9399
9400 if (RAW != '<') return(NULL);
9401 NEXT1;
9402
9403 /*
9404 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9405 * point since the attribute values may be stored as pointers to
9406 * the buffer and calling SHRINK would destroy them !
9407 * The Shrinking is only possible once the full set of attribute
9408 * callbacks have been done.
9409 */
9410 reparse:
9411 SHRINK;
9412 base = ctxt->input->base;
9413 cur = ctxt->input->cur - ctxt->input->base;
9414 inputNr = ctxt->inputNr;
9415 oldline = ctxt->input->line;
9416 oldcol = ctxt->input->col;
9417 nbatts = 0;
9418 nratts = 0;
9419 nbdef = 0;
9420 nbNs = 0;
9421 attval = 0;
9422 /* Forget any namespaces added during an earlier parse of this element. */
9423 ctxt->nsNr = nsNr;
9424
9425 localname = xmlParseQName(ctxt, &prefix);
9426 if (localname == NULL) {
9427 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9428 "StartTag: invalid element name\n");
9429 return(NULL);
9430 }
9431 *tlen = ctxt->input->cur - ctxt->input->base - cur;
9432
9433 /*
9434 * Now parse the attributes, it ends up with the ending
9435 *
9436 * (S Attribute)* S?
9437 */
9438 SKIP_BLANKS;
9439 GROW;
9440 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9441 goto base_changed;
9442
9443 while (((RAW != '>') &&
9444 ((RAW != '/') || (NXT(1) != '>')) &&
9445 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9446 const xmlChar *q = CUR_PTR;
9447 unsigned int cons = ctxt->input->consumed;
9448 int len = -1, alloc = 0;
9449
9450 attname = xmlParseAttribute2(ctxt, prefix, localname,
9451 &aprefix, &attvalue, &len, &alloc);
9452 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) {
9453 if ((attvalue != NULL) && (alloc != 0))
9454 xmlFree(attvalue);
9455 attvalue = NULL;
9456 goto base_changed;
9457 }
9458 if ((attname != NULL) && (attvalue != NULL)) {
9459 if (len < 0) len = xmlStrlen(attvalue);
9460 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9461 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9462 xmlURIPtr uri;
9463
9464 if (URL == NULL) {
9465 xmlErrMemory(ctxt, "dictionary allocation failure");
9466 if ((attvalue != NULL) && (alloc != 0))
9467 xmlFree(attvalue);
9468 return(NULL);
9469 }
9470 if (*URL != 0) {
9471 uri = xmlParseURI((const char *) URL);
9472 if (uri == NULL) {
9473 xmlNsErr(ctxt, XML_WAR_NS_URI,
9474 "xmlns: '%s' is not a valid URI\n",
9475 URL, NULL, NULL);
9476 } else {
9477 if (uri->scheme == NULL) {
9478 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9479 "xmlns: URI %s is not absolute\n",
9480 URL, NULL, NULL);
9481 }
9482 xmlFreeURI(uri);
9483 }
9484 if (URL == ctxt->str_xml_ns) {
9485 if (attname != ctxt->str_xml) {
9486 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9487 "xml namespace URI cannot be the default namespace\n",
9488 NULL, NULL, NULL);
9489 }
9490 goto skip_default_ns;
9491 }
9492 if ((len == 29) &&
9493 (xmlStrEqual(URL,
9494 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9495 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9496 "reuse of the xmlns namespace name is forbidden\n",
9497 NULL, NULL, NULL);
9498 goto skip_default_ns;
9499 }
9500 }
9501 /*
9502 * check that it's not a defined namespace
9503 */
9504 for (j = 1;j <= nbNs;j++)
9505 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9506 break;
9507 if (j <= nbNs)
9508 xmlErrAttributeDup(ctxt, NULL, attname);
9509 else
9510 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9511 skip_default_ns:
9512 if ((attvalue != NULL) && (alloc != 0)) {
9513 xmlFree(attvalue);
9514 attvalue = NULL;
9515 }
9516 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9517 break;
9518 if (!IS_BLANK_CH(RAW)) {
9519 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9520 "attributes construct error\n");
9521 break;
9522 }
9523 SKIP_BLANKS;
9524 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9525 goto base_changed;
9526 continue;
9527 }
9528 if (aprefix == ctxt->str_xmlns) {
9529 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9530 xmlURIPtr uri;
9531
9532 if (attname == ctxt->str_xml) {
9533 if (URL != ctxt->str_xml_ns) {
9534 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9535 "xml namespace prefix mapped to wrong URI\n",
9536 NULL, NULL, NULL);
9537 }
9538 /*
9539 * Do not keep a namespace definition node
9540 */
9541 goto skip_ns;
9542 }
9543 if (URL == ctxt->str_xml_ns) {
9544 if (attname != ctxt->str_xml) {
9545 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9546 "xml namespace URI mapped to wrong prefix\n",
9547 NULL, NULL, NULL);
9548 }
9549 goto skip_ns;
9550 }
9551 if (attname == ctxt->str_xmlns) {
9552 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9553 "redefinition of the xmlns prefix is forbidden\n",
9554 NULL, NULL, NULL);
9555 goto skip_ns;
9556 }
9557 if ((len == 29) &&
9558 (xmlStrEqual(URL,
9559 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9560 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9561 "reuse of the xmlns namespace name is forbidden\n",
9562 NULL, NULL, NULL);
9563 goto skip_ns;
9564 }
9565 if ((URL == NULL) || (URL[0] == 0)) {
9566 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9567 "xmlns:%s: Empty XML namespace is not allowed\n",
9568 attname, NULL, NULL);
9569 goto skip_ns;
9570 } else {
9571 uri = xmlParseURI((const char *) URL);
9572 if (uri == NULL) {
9573 xmlNsErr(ctxt, XML_WAR_NS_URI,
9574 "xmlns:%s: '%s' is not a valid URI\n",
9575 attname, URL, NULL);
9576 } else {
9577 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9578 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9579 "xmlns:%s: URI %s is not absolute\n",
9580 attname, URL, NULL);
9581 }
9582 xmlFreeURI(uri);
9583 }
9584 }
9585
9586 /*
9587 * check that it's not a defined namespace
9588 */
9589 for (j = 1;j <= nbNs;j++)
9590 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9591 break;
9592 if (j <= nbNs)
9593 xmlErrAttributeDup(ctxt, aprefix, attname);
9594 else
9595 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9596 skip_ns:
9597 if ((attvalue != NULL) && (alloc != 0)) {
9598 xmlFree(attvalue);
9599 attvalue = NULL;
9600 }
9601 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9602 break;
9603 if (!IS_BLANK_CH(RAW)) {
9604 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9605 "attributes construct error\n");
9606 break;
9607 }
9608 SKIP_BLANKS;
9609 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9610 goto base_changed;
9611 continue;
9612 }
9613
9614 /*
9615 * Add the pair to atts
9616 */
9617 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9618 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9619 if (attvalue[len] == 0)
9620 xmlFree(attvalue);
9621 goto failed;
9622 }
9623 maxatts = ctxt->maxatts;
9624 atts = ctxt->atts;
9625 }
9626 ctxt->attallocs[nratts++] = alloc;
9627 atts[nbatts++] = attname;
9628 atts[nbatts++] = aprefix;
9629 atts[nbatts++] = NULL; /* the URI will be fetched later */
9630 atts[nbatts++] = attvalue;
9631 attvalue += len;
9632 atts[nbatts++] = attvalue;
9633 /*
9634 * tag if some deallocation is needed
9635 */
9636 if (alloc != 0) attval = 1;
9637 } else {
9638 if ((attvalue != NULL) && (attvalue[len] == 0))
9639 xmlFree(attvalue);
9640 }
9641
9642 failed:
9643
9644 GROW
9645 if (ctxt->instate == XML_PARSER_EOF)
9646 break;
9647 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9648 goto base_changed;
9649 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9650 break;
9651 if (!IS_BLANK_CH(RAW)) {
9652 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9653 "attributes construct error\n");
9654 break;
9655 }
9656 SKIP_BLANKS;
9657 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9658 (attname == NULL) && (attvalue == NULL)) {
9659 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9660 "xmlParseStartTag: problem parsing attributes\n");
9661 break;
9662 }
9663 GROW;
9664 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9665 goto base_changed;
9666 }
9667
9668 /*
9669 * The attributes defaulting
9670 */
9671 if (ctxt->attsDefault != NULL) {
9672 xmlDefAttrsPtr defaults;
9673
9674 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9675 if (defaults != NULL) {
9676 for (i = 0;i < defaults->nbAttrs;i++) {
9677 attname = defaults->values[5 * i];
9678 aprefix = defaults->values[5 * i + 1];
9679
9680 /*
9681 * special work for namespaces defaulted defs
9682 */
9683 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9684 /*
9685 * check that it's not a defined namespace
9686 */
9687 for (j = 1;j <= nbNs;j++)
9688 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9689 break;
9690 if (j <= nbNs) continue;
9691
9692 nsname = xmlGetNamespace(ctxt, NULL);
9693 if (nsname != defaults->values[5 * i + 2]) {
9694 if (nsPush(ctxt, NULL,
9695 defaults->values[5 * i + 2]) > 0)
9696 nbNs++;
9697 }
9698 } else if (aprefix == ctxt->str_xmlns) {
9699 /*
9700 * check that it's not a defined namespace
9701 */
9702 for (j = 1;j <= nbNs;j++)
9703 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9704 break;
9705 if (j <= nbNs) continue;
9706
9707 nsname = xmlGetNamespace(ctxt, attname);
9708 if (nsname != defaults->values[2]) {
9709 if (nsPush(ctxt, attname,
9710 defaults->values[5 * i + 2]) > 0)
9711 nbNs++;
9712 }
9713 } else {
9714 /*
9715 * check that it's not a defined attribute
9716 */
9717 for (j = 0;j < nbatts;j+=5) {
9718 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9719 break;
9720 }
9721 if (j < nbatts) continue;
9722
9723 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9724 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9725 return(NULL);
9726 }
9727 maxatts = ctxt->maxatts;
9728 atts = ctxt->atts;
9729 }
9730 atts[nbatts++] = attname;
9731 atts[nbatts++] = aprefix;
9732 if (aprefix == NULL)
9733 atts[nbatts++] = NULL;
9734 else
9735 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9736 atts[nbatts++] = defaults->values[5 * i + 2];
9737 atts[nbatts++] = defaults->values[5 * i + 3];
9738 if ((ctxt->standalone == 1) &&
9739 (defaults->values[5 * i + 4] != NULL)) {
9740 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9741 "standalone: attribute %s on %s defaulted from external subset\n",
9742 attname, localname);
9743 }
9744 nbdef++;
9745 }
9746 }
9747 }
9748 }
9749
9750 /*
9751 * The attributes checkings
9752 */
9753 for (i = 0; i < nbatts;i += 5) {
9754 /*
9755 * The default namespace does not apply to attribute names.
9756 */
9757 if (atts[i + 1] != NULL) {
9758 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9759 if (nsname == NULL) {
9760 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9761 "Namespace prefix %s for %s on %s is not defined\n",
9762 atts[i + 1], atts[i], localname);
9763 }
9764 atts[i + 2] = nsname;
9765 } else
9766 nsname = NULL;
9767 /*
9768 * [ WFC: Unique Att Spec ]
9769 * No attribute name may appear more than once in the same
9770 * start-tag or empty-element tag.
9771 * As extended by the Namespace in XML REC.
9772 */
9773 for (j = 0; j < i;j += 5) {
9774 if (atts[i] == atts[j]) {
9775 if (atts[i+1] == atts[j+1]) {
9776 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9777 break;
9778 }
9779 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9780 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9781 "Namespaced Attribute %s in '%s' redefined\n",
9782 atts[i], nsname, NULL);
9783 break;
9784 }
9785 }
9786 }
9787 }
9788
9789 nsname = xmlGetNamespace(ctxt, prefix);
9790 if ((prefix != NULL) && (nsname == NULL)) {
9791 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9792 "Namespace prefix %s on %s is not defined\n",
9793 prefix, localname, NULL);
9794 }
9795 *pref = prefix;
9796 *URI = nsname;
9797
9798 /*
9799 * SAX: Start of Element !
9800 */
9801 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9802 (!ctxt->disableSAX)) {
9803 if (nbNs > 0)
9804 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9805 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9806 nbatts / 5, nbdef, atts);
9807 else
9808 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9809 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9810 }
9811
9812 /*
9813 * Free up attribute allocated strings if needed
9814 */
9815 if (attval != 0) {
9816 for (i = 3,j = 0; j < nratts;i += 5,j++)
9817 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9818 xmlFree((xmlChar *) atts[i]);
9819 }
9820
9821 return(localname);
9822
9823 base_changed:
9824 /*
9825 * the attribute strings are valid iif the base didn't changed
9826 */
9827 if (attval != 0) {
9828 for (i = 3,j = 0; j < nratts;i += 5,j++)
9829 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9830 xmlFree((xmlChar *) atts[i]);
9831 }
9832
9833 /*
9834 * We can't switch from one entity to another in the middle
9835 * of a start tag
9836 */
9837 if (inputNr != ctxt->inputNr) {
9838 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
9839 "Start tag doesn't start and stop in the same entity\n");
9840 return(NULL);
9841 }
9842
9843 ctxt->input->cur = ctxt->input->base + cur;
9844 ctxt->input->line = oldline;
9845 ctxt->input->col = oldcol;
9846 if (ctxt->wellFormed == 1) {
9847 goto reparse;
9848 }
9849 return(NULL);
9850 }
9851
9852 /**
9853 * xmlParseEndTag2:
9854 * @ctxt: an XML parser context
9855 * @line: line of the start tag
9856 * @nsNr: number of namespaces on the start tag
9857 *
9858 * parse an end of tag
9859 *
9860 * [42] ETag ::= '</' Name S? '>'
9861 *
9862 * With namespace
9863 *
9864 * [NS 9] ETag ::= '</' QName S? '>'
9865 */
9866
9867 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr,int tlen)9868 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9869 const xmlChar *URI, int line, int nsNr, int tlen) {
9870 const xmlChar *name;
9871 size_t curLength;
9872
9873 GROW;
9874 if ((RAW != '<') || (NXT(1) != '/')) {
9875 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9876 return;
9877 }
9878 SKIP(2);
9879
9880 curLength = ctxt->input->end - ctxt->input->cur;
9881 if ((tlen > 0) && (curLength >= (size_t)tlen) &&
9882 (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9883 if ((curLength >= (size_t)(tlen + 1)) &&
9884 (ctxt->input->cur[tlen] == '>')) {
9885 ctxt->input->cur += tlen + 1;
9886 ctxt->input->col += tlen + 1;
9887 goto done;
9888 }
9889 ctxt->input->cur += tlen;
9890 ctxt->input->col += tlen;
9891 name = (xmlChar*)1;
9892 } else {
9893 if (prefix == NULL)
9894 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9895 else
9896 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9897 }
9898
9899 /*
9900 * We should definitely be at the ending "S? '>'" part
9901 */
9902 GROW;
9903 if (ctxt->instate == XML_PARSER_EOF)
9904 return;
9905 SKIP_BLANKS;
9906 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9907 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9908 } else
9909 NEXT1;
9910
9911 /*
9912 * [ WFC: Element Type Match ]
9913 * The Name in an element's end-tag must match the element type in the
9914 * start-tag.
9915 *
9916 */
9917 if (name != (xmlChar*)1) {
9918 if (name == NULL) name = BAD_CAST "unparseable";
9919 if ((line == 0) && (ctxt->node != NULL))
9920 line = ctxt->node->line;
9921 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9922 "Opening and ending tag mismatch: %s line %d and %s\n",
9923 ctxt->name, line, name);
9924 }
9925
9926 /*
9927 * SAX: End of Tag
9928 */
9929 done:
9930 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9931 (!ctxt->disableSAX))
9932 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9933
9934 spacePop(ctxt);
9935 if (nsNr != 0)
9936 nsPop(ctxt, nsNr);
9937 return;
9938 }
9939
9940 /**
9941 * xmlParseCDSect:
9942 * @ctxt: an XML parser context
9943 *
9944 * Parse escaped pure raw content.
9945 *
9946 * [18] CDSect ::= CDStart CData CDEnd
9947 *
9948 * [19] CDStart ::= '<![CDATA['
9949 *
9950 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9951 *
9952 * [21] CDEnd ::= ']]>'
9953 */
9954 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9955 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9956 xmlChar *buf = NULL;
9957 int len = 0;
9958 int size = XML_PARSER_BUFFER_SIZE;
9959 int r, rl;
9960 int s, sl;
9961 int cur, l;
9962 int count = 0;
9963
9964 /* Check 2.6.0 was NXT(0) not RAW */
9965 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9966 SKIP(9);
9967 } else
9968 return;
9969
9970 ctxt->instate = XML_PARSER_CDATA_SECTION;
9971 r = CUR_CHAR(rl);
9972 if (!IS_CHAR(r)) {
9973 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9974 ctxt->instate = XML_PARSER_CONTENT;
9975 return;
9976 }
9977 NEXTL(rl);
9978 s = CUR_CHAR(sl);
9979 if (!IS_CHAR(s)) {
9980 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9981 ctxt->instate = XML_PARSER_CONTENT;
9982 return;
9983 }
9984 NEXTL(sl);
9985 cur = CUR_CHAR(l);
9986 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9987 if (buf == NULL) {
9988 xmlErrMemory(ctxt, NULL);
9989 return;
9990 }
9991 while (IS_CHAR(cur) &&
9992 ((r != ']') || (s != ']') || (cur != '>'))) {
9993 if (len + 5 >= size) {
9994 xmlChar *tmp;
9995
9996 if ((size > XML_MAX_TEXT_LENGTH) &&
9997 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9998 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9999 "CData section too big found", NULL);
10000 xmlFree (buf);
10001 return;
10002 }
10003 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
10004 if (tmp == NULL) {
10005 xmlFree(buf);
10006 xmlErrMemory(ctxt, NULL);
10007 return;
10008 }
10009 buf = tmp;
10010 size *= 2;
10011 }
10012 COPY_BUF(rl,buf,len,r);
10013 r = s;
10014 rl = sl;
10015 s = cur;
10016 sl = l;
10017 count++;
10018 if (count > 50) {
10019 GROW;
10020 if (ctxt->instate == XML_PARSER_EOF) {
10021 xmlFree(buf);
10022 return;
10023 }
10024 count = 0;
10025 }
10026 NEXTL(l);
10027 cur = CUR_CHAR(l);
10028 }
10029 buf[len] = 0;
10030 ctxt->instate = XML_PARSER_CONTENT;
10031 if (cur != '>') {
10032 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
10033 "CData section not finished\n%.50s\n", buf);
10034 xmlFree(buf);
10035 return;
10036 }
10037 NEXTL(l);
10038
10039 /*
10040 * OK the buffer is to be consumed as cdata.
10041 */
10042 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10043 if (ctxt->sax->cdataBlock != NULL)
10044 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
10045 else if (ctxt->sax->characters != NULL)
10046 ctxt->sax->characters(ctxt->userData, buf, len);
10047 }
10048 xmlFree(buf);
10049 }
10050
10051 /**
10052 * xmlParseContent:
10053 * @ctxt: an XML parser context
10054 *
10055 * Parse a content:
10056 *
10057 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10058 */
10059
10060 void
xmlParseContent(xmlParserCtxtPtr ctxt)10061 xmlParseContent(xmlParserCtxtPtr ctxt) {
10062 GROW;
10063 while ((RAW != 0) &&
10064 ((RAW != '<') || (NXT(1) != '/')) &&
10065 (ctxt->instate != XML_PARSER_EOF)) {
10066 const xmlChar *test = CUR_PTR;
10067 unsigned int cons = ctxt->input->consumed;
10068 const xmlChar *cur = ctxt->input->cur;
10069
10070 /*
10071 * First case : a Processing Instruction.
10072 */
10073 if ((*cur == '<') && (cur[1] == '?')) {
10074 xmlParsePI(ctxt);
10075 }
10076
10077 /*
10078 * Second case : a CDSection
10079 */
10080 /* 2.6.0 test was *cur not RAW */
10081 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
10082 xmlParseCDSect(ctxt);
10083 }
10084
10085 /*
10086 * Third case : a comment
10087 */
10088 else if ((*cur == '<') && (NXT(1) == '!') &&
10089 (NXT(2) == '-') && (NXT(3) == '-')) {
10090 xmlParseComment(ctxt);
10091 ctxt->instate = XML_PARSER_CONTENT;
10092 }
10093
10094 /*
10095 * Fourth case : a sub-element.
10096 */
10097 else if (*cur == '<') {
10098 xmlParseElement(ctxt);
10099 }
10100
10101 /*
10102 * Fifth case : a reference. If if has not been resolved,
10103 * parsing returns it's Name, create the node
10104 */
10105
10106 else if (*cur == '&') {
10107 xmlParseReference(ctxt);
10108 }
10109
10110 /*
10111 * Last case, text. Note that References are handled directly.
10112 */
10113 else {
10114 xmlParseCharData(ctxt, 0);
10115 }
10116
10117 GROW;
10118 /*
10119 * Pop-up of finished entities.
10120 */
10121 while ((RAW == 0) && (ctxt->inputNr > 1))
10122 xmlPopInput(ctxt);
10123 SHRINK;
10124
10125 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
10126 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10127 "detected an error in element content\n");
10128 xmlHaltParser(ctxt);
10129 break;
10130 }
10131 }
10132 }
10133
10134 /**
10135 * xmlParseElement:
10136 * @ctxt: an XML parser context
10137 *
10138 * parse an XML element, this is highly recursive
10139 *
10140 * [39] element ::= EmptyElemTag | STag content ETag
10141 *
10142 * [ WFC: Element Type Match ]
10143 * The Name in an element's end-tag must match the element type in the
10144 * start-tag.
10145 *
10146 */
10147
10148 void
xmlParseElement(xmlParserCtxtPtr ctxt)10149 xmlParseElement(xmlParserCtxtPtr ctxt) {
10150 const xmlChar *name;
10151 const xmlChar *prefix = NULL;
10152 const xmlChar *URI = NULL;
10153 xmlParserNodeInfo node_info;
10154 int line, tlen = 0;
10155 xmlNodePtr ret;
10156 int nsNr = ctxt->nsNr;
10157
10158 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10159 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10160 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10161 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10162 xmlParserMaxDepth);
10163 xmlHaltParser(ctxt);
10164 return;
10165 }
10166
10167 /* Capture start position */
10168 if (ctxt->record_info) {
10169 node_info.begin_pos = ctxt->input->consumed +
10170 (CUR_PTR - ctxt->input->base);
10171 node_info.begin_line = ctxt->input->line;
10172 }
10173
10174 if (ctxt->spaceNr == 0)
10175 spacePush(ctxt, -1);
10176 else if (*ctxt->space == -2)
10177 spacePush(ctxt, -1);
10178 else
10179 spacePush(ctxt, *ctxt->space);
10180
10181 line = ctxt->input->line;
10182 #ifdef LIBXML_SAX1_ENABLED
10183 if (ctxt->sax2)
10184 #endif /* LIBXML_SAX1_ENABLED */
10185 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10186 #ifdef LIBXML_SAX1_ENABLED
10187 else
10188 name = xmlParseStartTag(ctxt);
10189 #endif /* LIBXML_SAX1_ENABLED */
10190 if (ctxt->instate == XML_PARSER_EOF)
10191 return;
10192 if (name == NULL) {
10193 spacePop(ctxt);
10194 return;
10195 }
10196 namePush(ctxt, name);
10197 ret = ctxt->node;
10198
10199 #ifdef LIBXML_VALID_ENABLED
10200 /*
10201 * [ VC: Root Element Type ]
10202 * The Name in the document type declaration must match the element
10203 * type of the root element.
10204 */
10205 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10206 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10207 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10208 #endif /* LIBXML_VALID_ENABLED */
10209
10210 /*
10211 * Check for an Empty Element.
10212 */
10213 if ((RAW == '/') && (NXT(1) == '>')) {
10214 SKIP(2);
10215 if (ctxt->sax2) {
10216 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10217 (!ctxt->disableSAX))
10218 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10219 #ifdef LIBXML_SAX1_ENABLED
10220 } else {
10221 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10222 (!ctxt->disableSAX))
10223 ctxt->sax->endElement(ctxt->userData, name);
10224 #endif /* LIBXML_SAX1_ENABLED */
10225 }
10226 namePop(ctxt);
10227 spacePop(ctxt);
10228 if (nsNr != ctxt->nsNr)
10229 nsPop(ctxt, ctxt->nsNr - nsNr);
10230 if ( ret != NULL && ctxt->record_info ) {
10231 node_info.end_pos = ctxt->input->consumed +
10232 (CUR_PTR - ctxt->input->base);
10233 node_info.end_line = ctxt->input->line;
10234 node_info.node = ret;
10235 xmlParserAddNodeInfo(ctxt, &node_info);
10236 }
10237 return;
10238 }
10239 if (RAW == '>') {
10240 NEXT1;
10241 } else {
10242 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10243 "Couldn't find end of Start Tag %s line %d\n",
10244 name, line, NULL);
10245
10246 /*
10247 * end of parsing of this node.
10248 */
10249 nodePop(ctxt);
10250 namePop(ctxt);
10251 spacePop(ctxt);
10252 if (nsNr != ctxt->nsNr)
10253 nsPop(ctxt, ctxt->nsNr - nsNr);
10254
10255 /*
10256 * Capture end position and add node
10257 */
10258 if ( ret != NULL && ctxt->record_info ) {
10259 node_info.end_pos = ctxt->input->consumed +
10260 (CUR_PTR - ctxt->input->base);
10261 node_info.end_line = ctxt->input->line;
10262 node_info.node = ret;
10263 xmlParserAddNodeInfo(ctxt, &node_info);
10264 }
10265 return;
10266 }
10267
10268 /*
10269 * Parse the content of the element:
10270 */
10271 xmlParseContent(ctxt);
10272 if (ctxt->instate == XML_PARSER_EOF)
10273 return;
10274 if (!IS_BYTE_CHAR(RAW)) {
10275 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10276 "Premature end of data in tag %s line %d\n",
10277 name, line, NULL);
10278
10279 /*
10280 * end of parsing of this node.
10281 */
10282 nodePop(ctxt);
10283 namePop(ctxt);
10284 spacePop(ctxt);
10285 if (nsNr != ctxt->nsNr)
10286 nsPop(ctxt, ctxt->nsNr - nsNr);
10287 return;
10288 }
10289
10290 /*
10291 * parse the end of tag: '</' should be here.
10292 */
10293 if (ctxt->sax2) {
10294 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10295 namePop(ctxt);
10296 }
10297 #ifdef LIBXML_SAX1_ENABLED
10298 else
10299 xmlParseEndTag1(ctxt, line);
10300 #endif /* LIBXML_SAX1_ENABLED */
10301
10302 /*
10303 * Capture end position and add node
10304 */
10305 if ( ret != NULL && ctxt->record_info ) {
10306 node_info.end_pos = ctxt->input->consumed +
10307 (CUR_PTR - ctxt->input->base);
10308 node_info.end_line = ctxt->input->line;
10309 node_info.node = ret;
10310 xmlParserAddNodeInfo(ctxt, &node_info);
10311 }
10312 }
10313
10314 /**
10315 * xmlParseVersionNum:
10316 * @ctxt: an XML parser context
10317 *
10318 * parse the XML version value.
10319 *
10320 * [26] VersionNum ::= '1.' [0-9]+
10321 *
10322 * In practice allow [0-9].[0-9]+ at that level
10323 *
10324 * Returns the string giving the XML version number, or NULL
10325 */
10326 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)10327 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10328 xmlChar *buf = NULL;
10329 int len = 0;
10330 int size = 10;
10331 xmlChar cur;
10332
10333 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10334 if (buf == NULL) {
10335 xmlErrMemory(ctxt, NULL);
10336 return(NULL);
10337 }
10338 cur = CUR;
10339 if (!((cur >= '0') && (cur <= '9'))) {
10340 xmlFree(buf);
10341 return(NULL);
10342 }
10343 buf[len++] = cur;
10344 NEXT;
10345 cur=CUR;
10346 if (cur != '.') {
10347 xmlFree(buf);
10348 return(NULL);
10349 }
10350 buf[len++] = cur;
10351 NEXT;
10352 cur=CUR;
10353 while ((cur >= '0') && (cur <= '9')) {
10354 if (len + 1 >= size) {
10355 xmlChar *tmp;
10356
10357 size *= 2;
10358 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10359 if (tmp == NULL) {
10360 xmlFree(buf);
10361 xmlErrMemory(ctxt, NULL);
10362 return(NULL);
10363 }
10364 buf = tmp;
10365 }
10366 buf[len++] = cur;
10367 NEXT;
10368 cur=CUR;
10369 }
10370 buf[len] = 0;
10371 return(buf);
10372 }
10373
10374 /**
10375 * xmlParseVersionInfo:
10376 * @ctxt: an XML parser context
10377 *
10378 * parse the XML version.
10379 *
10380 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10381 *
10382 * [25] Eq ::= S? '=' S?
10383 *
10384 * Returns the version string, e.g. "1.0"
10385 */
10386
10387 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)10388 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10389 xmlChar *version = NULL;
10390
10391 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10392 SKIP(7);
10393 SKIP_BLANKS;
10394 if (RAW != '=') {
10395 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10396 return(NULL);
10397 }
10398 NEXT;
10399 SKIP_BLANKS;
10400 if (RAW == '"') {
10401 NEXT;
10402 version = xmlParseVersionNum(ctxt);
10403 if (RAW != '"') {
10404 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10405 } else
10406 NEXT;
10407 } else if (RAW == '\''){
10408 NEXT;
10409 version = xmlParseVersionNum(ctxt);
10410 if (RAW != '\'') {
10411 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10412 } else
10413 NEXT;
10414 } else {
10415 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10416 }
10417 }
10418 return(version);
10419 }
10420
10421 /**
10422 * xmlParseEncName:
10423 * @ctxt: an XML parser context
10424 *
10425 * parse the XML encoding name
10426 *
10427 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10428 *
10429 * Returns the encoding name value or NULL
10430 */
10431 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)10432 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10433 xmlChar *buf = NULL;
10434 int len = 0;
10435 int size = 10;
10436 xmlChar cur;
10437
10438 cur = CUR;
10439 if (((cur >= 'a') && (cur <= 'z')) ||
10440 ((cur >= 'A') && (cur <= 'Z'))) {
10441 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10442 if (buf == NULL) {
10443 xmlErrMemory(ctxt, NULL);
10444 return(NULL);
10445 }
10446
10447 buf[len++] = cur;
10448 NEXT;
10449 cur = CUR;
10450 while (((cur >= 'a') && (cur <= 'z')) ||
10451 ((cur >= 'A') && (cur <= 'Z')) ||
10452 ((cur >= '0') && (cur <= '9')) ||
10453 (cur == '.') || (cur == '_') ||
10454 (cur == '-')) {
10455 if (len + 1 >= size) {
10456 xmlChar *tmp;
10457
10458 size *= 2;
10459 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10460 if (tmp == NULL) {
10461 xmlErrMemory(ctxt, NULL);
10462 xmlFree(buf);
10463 return(NULL);
10464 }
10465 buf = tmp;
10466 }
10467 buf[len++] = cur;
10468 NEXT;
10469 cur = CUR;
10470 if (cur == 0) {
10471 SHRINK;
10472 GROW;
10473 cur = CUR;
10474 }
10475 }
10476 buf[len] = 0;
10477 } else {
10478 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10479 }
10480 return(buf);
10481 }
10482
10483 /**
10484 * xmlParseEncodingDecl:
10485 * @ctxt: an XML parser context
10486 *
10487 * parse the XML encoding declaration
10488 *
10489 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10490 *
10491 * this setups the conversion filters.
10492 *
10493 * Returns the encoding value or NULL
10494 */
10495
10496 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)10497 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10498 xmlChar *encoding = NULL;
10499
10500 SKIP_BLANKS;
10501 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10502 SKIP(8);
10503 SKIP_BLANKS;
10504 if (RAW != '=') {
10505 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10506 return(NULL);
10507 }
10508 NEXT;
10509 SKIP_BLANKS;
10510 if (RAW == '"') {
10511 NEXT;
10512 encoding = xmlParseEncName(ctxt);
10513 if (RAW != '"') {
10514 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10515 xmlFree((xmlChar *) encoding);
10516 return(NULL);
10517 } else
10518 NEXT;
10519 } else if (RAW == '\''){
10520 NEXT;
10521 encoding = xmlParseEncName(ctxt);
10522 if (RAW != '\'') {
10523 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10524 xmlFree((xmlChar *) encoding);
10525 return(NULL);
10526 } else
10527 NEXT;
10528 } else {
10529 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10530 }
10531
10532 /*
10533 * Non standard parsing, allowing the user to ignore encoding
10534 */
10535 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10536 xmlFree((xmlChar *) encoding);
10537 return(NULL);
10538 }
10539
10540 /*
10541 * UTF-16 encoding stwich has already taken place at this stage,
10542 * more over the little-endian/big-endian selection is already done
10543 */
10544 if ((encoding != NULL) &&
10545 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10546 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10547 /*
10548 * If no encoding was passed to the parser, that we are
10549 * using UTF-16 and no decoder is present i.e. the
10550 * document is apparently UTF-8 compatible, then raise an
10551 * encoding mismatch fatal error
10552 */
10553 if ((ctxt->encoding == NULL) &&
10554 (ctxt->input->buf != NULL) &&
10555 (ctxt->input->buf->encoder == NULL)) {
10556 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10557 "Document labelled UTF-16 but has UTF-8 content\n");
10558 }
10559 if (ctxt->encoding != NULL)
10560 xmlFree((xmlChar *) ctxt->encoding);
10561 ctxt->encoding = encoding;
10562 }
10563 /*
10564 * UTF-8 encoding is handled natively
10565 */
10566 else if ((encoding != NULL) &&
10567 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10568 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10569 if (ctxt->encoding != NULL)
10570 xmlFree((xmlChar *) ctxt->encoding);
10571 ctxt->encoding = encoding;
10572 }
10573 else if (encoding != NULL) {
10574 xmlCharEncodingHandlerPtr handler;
10575
10576 if (ctxt->input->encoding != NULL)
10577 xmlFree((xmlChar *) ctxt->input->encoding);
10578 ctxt->input->encoding = encoding;
10579
10580 handler = xmlFindCharEncodingHandler((const char *) encoding);
10581 if (handler != NULL) {
10582 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10583 /* failed to convert */
10584 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10585 return(NULL);
10586 }
10587 } else {
10588 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10589 "Unsupported encoding %s\n", encoding);
10590 return(NULL);
10591 }
10592 }
10593 }
10594 return(encoding);
10595 }
10596
10597 /**
10598 * xmlParseSDDecl:
10599 * @ctxt: an XML parser context
10600 *
10601 * parse the XML standalone declaration
10602 *
10603 * [32] SDDecl ::= S 'standalone' Eq
10604 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10605 *
10606 * [ VC: Standalone Document Declaration ]
10607 * TODO The standalone document declaration must have the value "no"
10608 * if any external markup declarations contain declarations of:
10609 * - attributes with default values, if elements to which these
10610 * attributes apply appear in the document without specifications
10611 * of values for these attributes, or
10612 * - entities (other than amp, lt, gt, apos, quot), if references
10613 * to those entities appear in the document, or
10614 * - attributes with values subject to normalization, where the
10615 * attribute appears in the document with a value which will change
10616 * as a result of normalization, or
10617 * - element types with element content, if white space occurs directly
10618 * within any instance of those types.
10619 *
10620 * Returns:
10621 * 1 if standalone="yes"
10622 * 0 if standalone="no"
10623 * -2 if standalone attribute is missing or invalid
10624 * (A standalone value of -2 means that the XML declaration was found,
10625 * but no value was specified for the standalone attribute).
10626 */
10627
10628 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10629 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10630 int standalone = -2;
10631
10632 SKIP_BLANKS;
10633 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10634 SKIP(10);
10635 SKIP_BLANKS;
10636 if (RAW != '=') {
10637 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10638 return(standalone);
10639 }
10640 NEXT;
10641 SKIP_BLANKS;
10642 if (RAW == '\''){
10643 NEXT;
10644 if ((RAW == 'n') && (NXT(1) == 'o')) {
10645 standalone = 0;
10646 SKIP(2);
10647 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10648 (NXT(2) == 's')) {
10649 standalone = 1;
10650 SKIP(3);
10651 } else {
10652 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10653 }
10654 if (RAW != '\'') {
10655 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10656 } else
10657 NEXT;
10658 } else if (RAW == '"'){
10659 NEXT;
10660 if ((RAW == 'n') && (NXT(1) == 'o')) {
10661 standalone = 0;
10662 SKIP(2);
10663 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10664 (NXT(2) == 's')) {
10665 standalone = 1;
10666 SKIP(3);
10667 } else {
10668 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10669 }
10670 if (RAW != '"') {
10671 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10672 } else
10673 NEXT;
10674 } else {
10675 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10676 }
10677 }
10678 return(standalone);
10679 }
10680
10681 /**
10682 * xmlParseXMLDecl:
10683 * @ctxt: an XML parser context
10684 *
10685 * parse an XML declaration header
10686 *
10687 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10688 */
10689
10690 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10691 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10692 xmlChar *version;
10693
10694 /*
10695 * This value for standalone indicates that the document has an
10696 * XML declaration but it does not have a standalone attribute.
10697 * It will be overwritten later if a standalone attribute is found.
10698 */
10699 ctxt->input->standalone = -2;
10700
10701 /*
10702 * We know that '<?xml' is here.
10703 */
10704 SKIP(5);
10705
10706 if (!IS_BLANK_CH(RAW)) {
10707 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10708 "Blank needed after '<?xml'\n");
10709 }
10710 SKIP_BLANKS;
10711
10712 /*
10713 * We must have the VersionInfo here.
10714 */
10715 version = xmlParseVersionInfo(ctxt);
10716 if (version == NULL) {
10717 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10718 } else {
10719 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10720 /*
10721 * Changed here for XML-1.0 5th edition
10722 */
10723 if (ctxt->options & XML_PARSE_OLD10) {
10724 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10725 "Unsupported version '%s'\n",
10726 version);
10727 } else {
10728 if ((version[0] == '1') && ((version[1] == '.'))) {
10729 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10730 "Unsupported version '%s'\n",
10731 version, NULL);
10732 } else {
10733 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10734 "Unsupported version '%s'\n",
10735 version);
10736 }
10737 }
10738 }
10739 if (ctxt->version != NULL)
10740 xmlFree((void *) ctxt->version);
10741 ctxt->version = version;
10742 }
10743
10744 /*
10745 * We may have the encoding declaration
10746 */
10747 if (!IS_BLANK_CH(RAW)) {
10748 if ((RAW == '?') && (NXT(1) == '>')) {
10749 SKIP(2);
10750 return;
10751 }
10752 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10753 }
10754 xmlParseEncodingDecl(ctxt);
10755 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10756 (ctxt->instate == XML_PARSER_EOF)) {
10757 /*
10758 * The XML REC instructs us to stop parsing right here
10759 */
10760 return;
10761 }
10762
10763 /*
10764 * We may have the standalone status.
10765 */
10766 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10767 if ((RAW == '?') && (NXT(1) == '>')) {
10768 SKIP(2);
10769 return;
10770 }
10771 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10772 }
10773
10774 /*
10775 * We can grow the input buffer freely at that point
10776 */
10777 GROW;
10778
10779 SKIP_BLANKS;
10780 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10781
10782 SKIP_BLANKS;
10783 if ((RAW == '?') && (NXT(1) == '>')) {
10784 SKIP(2);
10785 } else if (RAW == '>') {
10786 /* Deprecated old WD ... */
10787 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10788 NEXT;
10789 } else {
10790 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10791 MOVETO_ENDTAG(CUR_PTR);
10792 NEXT;
10793 }
10794 }
10795
10796 /**
10797 * xmlParseMisc:
10798 * @ctxt: an XML parser context
10799 *
10800 * parse an XML Misc* optional field.
10801 *
10802 * [27] Misc ::= Comment | PI | S
10803 */
10804
10805 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10806 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10807 while ((ctxt->instate != XML_PARSER_EOF) &&
10808 (((RAW == '<') && (NXT(1) == '?')) ||
10809 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10810 IS_BLANK_CH(CUR))) {
10811 if ((RAW == '<') && (NXT(1) == '?')) {
10812 xmlParsePI(ctxt);
10813 } else if (IS_BLANK_CH(CUR)) {
10814 NEXT;
10815 } else
10816 xmlParseComment(ctxt);
10817 }
10818 }
10819
10820 /**
10821 * xmlParseDocument:
10822 * @ctxt: an XML parser context
10823 *
10824 * parse an XML document (and build a tree if using the standard SAX
10825 * interface).
10826 *
10827 * [1] document ::= prolog element Misc*
10828 *
10829 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10830 *
10831 * Returns 0, -1 in case of error. the parser context is augmented
10832 * as a result of the parsing.
10833 */
10834
10835 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10836 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10837 xmlChar start[4];
10838 xmlCharEncoding enc;
10839
10840 xmlInitParser();
10841
10842 if ((ctxt == NULL) || (ctxt->input == NULL))
10843 return(-1);
10844
10845 GROW;
10846
10847 /*
10848 * SAX: detecting the level.
10849 */
10850 xmlDetectSAX2(ctxt);
10851
10852 /*
10853 * SAX: beginning of the document processing.
10854 */
10855 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10856 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10857 if (ctxt->instate == XML_PARSER_EOF)
10858 return(-1);
10859
10860 if ((ctxt->encoding == NULL) &&
10861 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10862 /*
10863 * Get the 4 first bytes and decode the charset
10864 * if enc != XML_CHAR_ENCODING_NONE
10865 * plug some encoding conversion routines.
10866 */
10867 start[0] = RAW;
10868 start[1] = NXT(1);
10869 start[2] = NXT(2);
10870 start[3] = NXT(3);
10871 enc = xmlDetectCharEncoding(&start[0], 4);
10872 if (enc != XML_CHAR_ENCODING_NONE) {
10873 xmlSwitchEncoding(ctxt, enc);
10874 }
10875 }
10876
10877
10878 if (CUR == 0) {
10879 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10880 return(-1);
10881 }
10882
10883 /*
10884 * Check for the XMLDecl in the Prolog.
10885 * do not GROW here to avoid the detected encoder to decode more
10886 * than just the first line, unless the amount of data is really
10887 * too small to hold "<?xml version="1.0" encoding="foo"
10888 */
10889 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10890 GROW;
10891 }
10892 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10893
10894 /*
10895 * Note that we will switch encoding on the fly.
10896 */
10897 xmlParseXMLDecl(ctxt);
10898 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10899 (ctxt->instate == XML_PARSER_EOF)) {
10900 /*
10901 * The XML REC instructs us to stop parsing right here
10902 */
10903 return(-1);
10904 }
10905 ctxt->standalone = ctxt->input->standalone;
10906 SKIP_BLANKS;
10907 } else {
10908 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10909 }
10910 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10911 ctxt->sax->startDocument(ctxt->userData);
10912 if (ctxt->instate == XML_PARSER_EOF)
10913 return(-1);
10914 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10915 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10916 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10917 }
10918
10919 /*
10920 * The Misc part of the Prolog
10921 */
10922 GROW;
10923 xmlParseMisc(ctxt);
10924
10925 /*
10926 * Then possibly doc type declaration(s) and more Misc
10927 * (doctypedecl Misc*)?
10928 */
10929 GROW;
10930 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10931
10932 ctxt->inSubset = 1;
10933 xmlParseDocTypeDecl(ctxt);
10934 if (RAW == '[') {
10935 ctxt->instate = XML_PARSER_DTD;
10936 xmlParseInternalSubset(ctxt);
10937 if (ctxt->instate == XML_PARSER_EOF)
10938 return(-1);
10939 }
10940
10941 /*
10942 * Create and update the external subset.
10943 */
10944 ctxt->inSubset = 2;
10945 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10946 (!ctxt->disableSAX))
10947 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10948 ctxt->extSubSystem, ctxt->extSubURI);
10949 if (ctxt->instate == XML_PARSER_EOF)
10950 return(-1);
10951 ctxt->inSubset = 0;
10952
10953 xmlCleanSpecialAttr(ctxt);
10954
10955 ctxt->instate = XML_PARSER_PROLOG;
10956 xmlParseMisc(ctxt);
10957 }
10958
10959 /*
10960 * Time to start parsing the tree itself
10961 */
10962 GROW;
10963 if (RAW != '<') {
10964 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10965 "Start tag expected, '<' not found\n");
10966 } else {
10967 ctxt->instate = XML_PARSER_CONTENT;
10968 xmlParseElement(ctxt);
10969 ctxt->instate = XML_PARSER_EPILOG;
10970
10971
10972 /*
10973 * The Misc part at the end
10974 */
10975 xmlParseMisc(ctxt);
10976
10977 if (RAW != 0) {
10978 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10979 }
10980 ctxt->instate = XML_PARSER_EOF;
10981 }
10982
10983 /*
10984 * SAX: end of the document processing.
10985 */
10986 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10987 ctxt->sax->endDocument(ctxt->userData);
10988
10989 /*
10990 * Remove locally kept entity definitions if the tree was not built
10991 */
10992 if ((ctxt->myDoc != NULL) &&
10993 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10994 xmlFreeDoc(ctxt->myDoc);
10995 ctxt->myDoc = NULL;
10996 }
10997
10998 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10999 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
11000 if (ctxt->valid)
11001 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
11002 if (ctxt->nsWellFormed)
11003 ctxt->myDoc->properties |= XML_DOC_NSVALID;
11004 if (ctxt->options & XML_PARSE_OLD10)
11005 ctxt->myDoc->properties |= XML_DOC_OLD10;
11006 }
11007 if (! ctxt->wellFormed) {
11008 ctxt->valid = 0;
11009 return(-1);
11010 }
11011 return(0);
11012 }
11013
11014 /**
11015 * xmlParseExtParsedEnt:
11016 * @ctxt: an XML parser context
11017 *
11018 * parse a general parsed entity
11019 * An external general parsed entity is well-formed if it matches the
11020 * production labeled extParsedEnt.
11021 *
11022 * [78] extParsedEnt ::= TextDecl? content
11023 *
11024 * Returns 0, -1 in case of error. the parser context is augmented
11025 * as a result of the parsing.
11026 */
11027
11028 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)11029 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11030 xmlChar start[4];
11031 xmlCharEncoding enc;
11032
11033 if ((ctxt == NULL) || (ctxt->input == NULL))
11034 return(-1);
11035
11036 xmlDefaultSAXHandlerInit();
11037
11038 xmlDetectSAX2(ctxt);
11039
11040 GROW;
11041
11042 /*
11043 * SAX: beginning of the document processing.
11044 */
11045 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11046 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11047
11048 /*
11049 * Get the 4 first bytes and decode the charset
11050 * if enc != XML_CHAR_ENCODING_NONE
11051 * plug some encoding conversion routines.
11052 */
11053 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11054 start[0] = RAW;
11055 start[1] = NXT(1);
11056 start[2] = NXT(2);
11057 start[3] = NXT(3);
11058 enc = xmlDetectCharEncoding(start, 4);
11059 if (enc != XML_CHAR_ENCODING_NONE) {
11060 xmlSwitchEncoding(ctxt, enc);
11061 }
11062 }
11063
11064
11065 if (CUR == 0) {
11066 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11067 }
11068
11069 /*
11070 * Check for the XMLDecl in the Prolog.
11071 */
11072 GROW;
11073 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11074
11075 /*
11076 * Note that we will switch encoding on the fly.
11077 */
11078 xmlParseXMLDecl(ctxt);
11079 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11080 /*
11081 * The XML REC instructs us to stop parsing right here
11082 */
11083 return(-1);
11084 }
11085 SKIP_BLANKS;
11086 } else {
11087 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11088 }
11089 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11090 ctxt->sax->startDocument(ctxt->userData);
11091 if (ctxt->instate == XML_PARSER_EOF)
11092 return(-1);
11093
11094 /*
11095 * Doing validity checking on chunk doesn't make sense
11096 */
11097 ctxt->instate = XML_PARSER_CONTENT;
11098 ctxt->validate = 0;
11099 ctxt->loadsubset = 0;
11100 ctxt->depth = 0;
11101
11102 xmlParseContent(ctxt);
11103 if (ctxt->instate == XML_PARSER_EOF)
11104 return(-1);
11105
11106 if ((RAW == '<') && (NXT(1) == '/')) {
11107 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11108 } else if (RAW != 0) {
11109 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11110 }
11111
11112 /*
11113 * SAX: end of the document processing.
11114 */
11115 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11116 ctxt->sax->endDocument(ctxt->userData);
11117
11118 if (! ctxt->wellFormed) return(-1);
11119 return(0);
11120 }
11121
11122 #ifdef LIBXML_PUSH_ENABLED
11123 /************************************************************************
11124 * *
11125 * Progressive parsing interfaces *
11126 * *
11127 ************************************************************************/
11128
11129 /**
11130 * xmlParseLookupSequence:
11131 * @ctxt: an XML parser context
11132 * @first: the first char to lookup
11133 * @next: the next char to lookup or zero
11134 * @third: the next char to lookup or zero
11135 *
11136 * Try to find if a sequence (first, next, third) or just (first next) or
11137 * (first) is available in the input stream.
11138 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11139 * to avoid rescanning sequences of bytes, it DOES change the state of the
11140 * parser, do not use liberally.
11141 *
11142 * Returns the index to the current parsing point if the full sequence
11143 * is available, -1 otherwise.
11144 */
11145 static int
xmlParseLookupSequence(xmlParserCtxtPtr ctxt,xmlChar first,xmlChar next,xmlChar third)11146 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11147 xmlChar next, xmlChar third) {
11148 int base, len;
11149 xmlParserInputPtr in;
11150 const xmlChar *buf;
11151
11152 in = ctxt->input;
11153 if (in == NULL) return(-1);
11154 base = in->cur - in->base;
11155 if (base < 0) return(-1);
11156 if (ctxt->checkIndex > base)
11157 base = ctxt->checkIndex;
11158 if (in->buf == NULL) {
11159 buf = in->base;
11160 len = in->length;
11161 } else {
11162 buf = xmlBufContent(in->buf->buffer);
11163 len = xmlBufUse(in->buf->buffer);
11164 }
11165 /* take into account the sequence length */
11166 if (third) len -= 2;
11167 else if (next) len --;
11168 for (;base < len;base++) {
11169 if (buf[base] == first) {
11170 if (third != 0) {
11171 if ((buf[base + 1] != next) ||
11172 (buf[base + 2] != third)) continue;
11173 } else if (next != 0) {
11174 if (buf[base + 1] != next) continue;
11175 }
11176 ctxt->checkIndex = 0;
11177 #ifdef DEBUG_PUSH
11178 if (next == 0)
11179 xmlGenericError(xmlGenericErrorContext,
11180 "PP: lookup '%c' found at %d\n",
11181 first, base);
11182 else if (third == 0)
11183 xmlGenericError(xmlGenericErrorContext,
11184 "PP: lookup '%c%c' found at %d\n",
11185 first, next, base);
11186 else
11187 xmlGenericError(xmlGenericErrorContext,
11188 "PP: lookup '%c%c%c' found at %d\n",
11189 first, next, third, base);
11190 #endif
11191 return(base - (in->cur - in->base));
11192 }
11193 }
11194 ctxt->checkIndex = base;
11195 #ifdef DEBUG_PUSH
11196 if (next == 0)
11197 xmlGenericError(xmlGenericErrorContext,
11198 "PP: lookup '%c' failed\n", first);
11199 else if (third == 0)
11200 xmlGenericError(xmlGenericErrorContext,
11201 "PP: lookup '%c%c' failed\n", first, next);
11202 else
11203 xmlGenericError(xmlGenericErrorContext,
11204 "PP: lookup '%c%c%c' failed\n", first, next, third);
11205 #endif
11206 return(-1);
11207 }
11208
11209 /**
11210 * xmlParseGetLasts:
11211 * @ctxt: an XML parser context
11212 * @lastlt: pointer to store the last '<' from the input
11213 * @lastgt: pointer to store the last '>' from the input
11214 *
11215 * Lookup the last < and > in the current chunk
11216 */
11217 static void
xmlParseGetLasts(xmlParserCtxtPtr ctxt,const xmlChar ** lastlt,const xmlChar ** lastgt)11218 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11219 const xmlChar **lastgt) {
11220 const xmlChar *tmp;
11221
11222 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11223 xmlGenericError(xmlGenericErrorContext,
11224 "Internal error: xmlParseGetLasts\n");
11225 return;
11226 }
11227 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11228 tmp = ctxt->input->end;
11229 tmp--;
11230 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11231 if (tmp < ctxt->input->base) {
11232 *lastlt = NULL;
11233 *lastgt = NULL;
11234 } else {
11235 *lastlt = tmp;
11236 tmp++;
11237 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11238 if (*tmp == '\'') {
11239 tmp++;
11240 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11241 if (tmp < ctxt->input->end) tmp++;
11242 } else if (*tmp == '"') {
11243 tmp++;
11244 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11245 if (tmp < ctxt->input->end) tmp++;
11246 } else
11247 tmp++;
11248 }
11249 if (tmp < ctxt->input->end)
11250 *lastgt = tmp;
11251 else {
11252 tmp = *lastlt;
11253 tmp--;
11254 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11255 if (tmp >= ctxt->input->base)
11256 *lastgt = tmp;
11257 else
11258 *lastgt = NULL;
11259 }
11260 }
11261 } else {
11262 *lastlt = NULL;
11263 *lastgt = NULL;
11264 }
11265 }
11266 /**
11267 * xmlCheckCdataPush:
11268 * @cur: pointer to the block of characters
11269 * @len: length of the block in bytes
11270 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11271 *
11272 * Check that the block of characters is okay as SCdata content [20]
11273 *
11274 * Returns the number of bytes to pass if okay, a negative index where an
11275 * UTF-8 error occured otherwise
11276 */
11277 static int
xmlCheckCdataPush(const xmlChar * utf,int len,int complete)11278 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11279 int ix;
11280 unsigned char c;
11281 int codepoint;
11282
11283 if ((utf == NULL) || (len <= 0))
11284 return(0);
11285
11286 for (ix = 0; ix < len;) { /* string is 0-terminated */
11287 c = utf[ix];
11288 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11289 if (c >= 0x20)
11290 ix++;
11291 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11292 ix++;
11293 else
11294 return(-ix);
11295 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11296 if (ix + 2 > len) return(complete ? -ix : ix);
11297 if ((utf[ix+1] & 0xc0 ) != 0x80)
11298 return(-ix);
11299 codepoint = (utf[ix] & 0x1f) << 6;
11300 codepoint |= utf[ix+1] & 0x3f;
11301 if (!xmlIsCharQ(codepoint))
11302 return(-ix);
11303 ix += 2;
11304 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11305 if (ix + 3 > len) return(complete ? -ix : ix);
11306 if (((utf[ix+1] & 0xc0) != 0x80) ||
11307 ((utf[ix+2] & 0xc0) != 0x80))
11308 return(-ix);
11309 codepoint = (utf[ix] & 0xf) << 12;
11310 codepoint |= (utf[ix+1] & 0x3f) << 6;
11311 codepoint |= utf[ix+2] & 0x3f;
11312 if (!xmlIsCharQ(codepoint))
11313 return(-ix);
11314 ix += 3;
11315 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11316 if (ix + 4 > len) return(complete ? -ix : ix);
11317 if (((utf[ix+1] & 0xc0) != 0x80) ||
11318 ((utf[ix+2] & 0xc0) != 0x80) ||
11319 ((utf[ix+3] & 0xc0) != 0x80))
11320 return(-ix);
11321 codepoint = (utf[ix] & 0x7) << 18;
11322 codepoint |= (utf[ix+1] & 0x3f) << 12;
11323 codepoint |= (utf[ix+2] & 0x3f) << 6;
11324 codepoint |= utf[ix+3] & 0x3f;
11325 if (!xmlIsCharQ(codepoint))
11326 return(-ix);
11327 ix += 4;
11328 } else /* unknown encoding */
11329 return(-ix);
11330 }
11331 return(ix);
11332 }
11333
11334 /**
11335 * xmlParseTryOrFinish:
11336 * @ctxt: an XML parser context
11337 * @terminate: last chunk indicator
11338 *
11339 * Try to progress on parsing
11340 *
11341 * Returns zero if no parsing was possible
11342 */
11343 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)11344 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11345 int ret = 0;
11346 int avail, tlen;
11347 xmlChar cur, next;
11348 const xmlChar *lastlt, *lastgt;
11349
11350 if (ctxt->input == NULL)
11351 return(0);
11352
11353 #ifdef DEBUG_PUSH
11354 switch (ctxt->instate) {
11355 case XML_PARSER_EOF:
11356 xmlGenericError(xmlGenericErrorContext,
11357 "PP: try EOF\n"); break;
11358 case XML_PARSER_START:
11359 xmlGenericError(xmlGenericErrorContext,
11360 "PP: try START\n"); break;
11361 case XML_PARSER_MISC:
11362 xmlGenericError(xmlGenericErrorContext,
11363 "PP: try MISC\n");break;
11364 case XML_PARSER_COMMENT:
11365 xmlGenericError(xmlGenericErrorContext,
11366 "PP: try COMMENT\n");break;
11367 case XML_PARSER_PROLOG:
11368 xmlGenericError(xmlGenericErrorContext,
11369 "PP: try PROLOG\n");break;
11370 case XML_PARSER_START_TAG:
11371 xmlGenericError(xmlGenericErrorContext,
11372 "PP: try START_TAG\n");break;
11373 case XML_PARSER_CONTENT:
11374 xmlGenericError(xmlGenericErrorContext,
11375 "PP: try CONTENT\n");break;
11376 case XML_PARSER_CDATA_SECTION:
11377 xmlGenericError(xmlGenericErrorContext,
11378 "PP: try CDATA_SECTION\n");break;
11379 case XML_PARSER_END_TAG:
11380 xmlGenericError(xmlGenericErrorContext,
11381 "PP: try END_TAG\n");break;
11382 case XML_PARSER_ENTITY_DECL:
11383 xmlGenericError(xmlGenericErrorContext,
11384 "PP: try ENTITY_DECL\n");break;
11385 case XML_PARSER_ENTITY_VALUE:
11386 xmlGenericError(xmlGenericErrorContext,
11387 "PP: try ENTITY_VALUE\n");break;
11388 case XML_PARSER_ATTRIBUTE_VALUE:
11389 xmlGenericError(xmlGenericErrorContext,
11390 "PP: try ATTRIBUTE_VALUE\n");break;
11391 case XML_PARSER_DTD:
11392 xmlGenericError(xmlGenericErrorContext,
11393 "PP: try DTD\n");break;
11394 case XML_PARSER_EPILOG:
11395 xmlGenericError(xmlGenericErrorContext,
11396 "PP: try EPILOG\n");break;
11397 case XML_PARSER_PI:
11398 xmlGenericError(xmlGenericErrorContext,
11399 "PP: try PI\n");break;
11400 case XML_PARSER_IGNORE:
11401 xmlGenericError(xmlGenericErrorContext,
11402 "PP: try IGNORE\n");break;
11403 }
11404 #endif
11405
11406 if ((ctxt->input != NULL) &&
11407 (ctxt->input->cur - ctxt->input->base > 4096)) {
11408 xmlSHRINK(ctxt);
11409 ctxt->checkIndex = 0;
11410 }
11411 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11412
11413 while (ctxt->instate != XML_PARSER_EOF) {
11414 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11415 return(0);
11416
11417
11418 /*
11419 * Pop-up of finished entities.
11420 */
11421 while ((RAW == 0) && (ctxt->inputNr > 1))
11422 xmlPopInput(ctxt);
11423
11424 if (ctxt->input == NULL) break;
11425 if (ctxt->input->buf == NULL)
11426 avail = ctxt->input->length -
11427 (ctxt->input->cur - ctxt->input->base);
11428 else {
11429 /*
11430 * If we are operating on converted input, try to flush
11431 * remainng chars to avoid them stalling in the non-converted
11432 * buffer. But do not do this in document start where
11433 * encoding="..." may not have been read and we work on a
11434 * guessed encoding.
11435 */
11436 if ((ctxt->instate != XML_PARSER_START) &&
11437 (ctxt->input->buf->raw != NULL) &&
11438 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11439 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11440 ctxt->input);
11441 size_t current = ctxt->input->cur - ctxt->input->base;
11442
11443 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11444 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11445 base, current);
11446 }
11447 avail = xmlBufUse(ctxt->input->buf->buffer) -
11448 (ctxt->input->cur - ctxt->input->base);
11449 }
11450 if (avail < 1)
11451 goto done;
11452 switch (ctxt->instate) {
11453 case XML_PARSER_EOF:
11454 /*
11455 * Document parsing is done !
11456 */
11457 goto done;
11458 case XML_PARSER_START:
11459 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11460 xmlChar start[4];
11461 xmlCharEncoding enc;
11462
11463 /*
11464 * Very first chars read from the document flow.
11465 */
11466 if (avail < 4)
11467 goto done;
11468
11469 /*
11470 * Get the 4 first bytes and decode the charset
11471 * if enc != XML_CHAR_ENCODING_NONE
11472 * plug some encoding conversion routines,
11473 * else xmlSwitchEncoding will set to (default)
11474 * UTF8.
11475 */
11476 start[0] = RAW;
11477 start[1] = NXT(1);
11478 start[2] = NXT(2);
11479 start[3] = NXT(3);
11480 enc = xmlDetectCharEncoding(start, 4);
11481 xmlSwitchEncoding(ctxt, enc);
11482 break;
11483 }
11484
11485 if (avail < 2)
11486 goto done;
11487 cur = ctxt->input->cur[0];
11488 next = ctxt->input->cur[1];
11489 if (cur == 0) {
11490 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11491 ctxt->sax->setDocumentLocator(ctxt->userData,
11492 &xmlDefaultSAXLocator);
11493 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11494 xmlHaltParser(ctxt);
11495 #ifdef DEBUG_PUSH
11496 xmlGenericError(xmlGenericErrorContext,
11497 "PP: entering EOF\n");
11498 #endif
11499 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11500 ctxt->sax->endDocument(ctxt->userData);
11501 goto done;
11502 }
11503 if ((cur == '<') && (next == '?')) {
11504 /* PI or XML decl */
11505 if (avail < 5) return(ret);
11506 if ((!terminate) &&
11507 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11508 return(ret);
11509 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11510 ctxt->sax->setDocumentLocator(ctxt->userData,
11511 &xmlDefaultSAXLocator);
11512 if ((ctxt->input->cur[2] == 'x') &&
11513 (ctxt->input->cur[3] == 'm') &&
11514 (ctxt->input->cur[4] == 'l') &&
11515 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11516 ret += 5;
11517 #ifdef DEBUG_PUSH
11518 xmlGenericError(xmlGenericErrorContext,
11519 "PP: Parsing XML Decl\n");
11520 #endif
11521 xmlParseXMLDecl(ctxt);
11522 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11523 /*
11524 * The XML REC instructs us to stop parsing right
11525 * here
11526 */
11527 xmlHaltParser(ctxt);
11528 return(0);
11529 }
11530 ctxt->standalone = ctxt->input->standalone;
11531 if ((ctxt->encoding == NULL) &&
11532 (ctxt->input->encoding != NULL))
11533 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11534 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11535 (!ctxt->disableSAX))
11536 ctxt->sax->startDocument(ctxt->userData);
11537 ctxt->instate = XML_PARSER_MISC;
11538 #ifdef DEBUG_PUSH
11539 xmlGenericError(xmlGenericErrorContext,
11540 "PP: entering MISC\n");
11541 #endif
11542 } else {
11543 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11544 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11545 (!ctxt->disableSAX))
11546 ctxt->sax->startDocument(ctxt->userData);
11547 ctxt->instate = XML_PARSER_MISC;
11548 #ifdef DEBUG_PUSH
11549 xmlGenericError(xmlGenericErrorContext,
11550 "PP: entering MISC\n");
11551 #endif
11552 }
11553 } else {
11554 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11555 ctxt->sax->setDocumentLocator(ctxt->userData,
11556 &xmlDefaultSAXLocator);
11557 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11558 if (ctxt->version == NULL) {
11559 xmlErrMemory(ctxt, NULL);
11560 break;
11561 }
11562 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11563 (!ctxt->disableSAX))
11564 ctxt->sax->startDocument(ctxt->userData);
11565 ctxt->instate = XML_PARSER_MISC;
11566 #ifdef DEBUG_PUSH
11567 xmlGenericError(xmlGenericErrorContext,
11568 "PP: entering MISC\n");
11569 #endif
11570 }
11571 break;
11572 case XML_PARSER_START_TAG: {
11573 const xmlChar *name;
11574 const xmlChar *prefix = NULL;
11575 const xmlChar *URI = NULL;
11576 int nsNr = ctxt->nsNr;
11577
11578 if ((avail < 2) && (ctxt->inputNr == 1))
11579 goto done;
11580 cur = ctxt->input->cur[0];
11581 if (cur != '<') {
11582 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11583 xmlHaltParser(ctxt);
11584 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11585 ctxt->sax->endDocument(ctxt->userData);
11586 goto done;
11587 }
11588 if (!terminate) {
11589 if (ctxt->progressive) {
11590 /* > can be found unescaped in attribute values */
11591 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11592 goto done;
11593 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11594 goto done;
11595 }
11596 }
11597 if (ctxt->spaceNr == 0)
11598 spacePush(ctxt, -1);
11599 else if (*ctxt->space == -2)
11600 spacePush(ctxt, -1);
11601 else
11602 spacePush(ctxt, *ctxt->space);
11603 #ifdef LIBXML_SAX1_ENABLED
11604 if (ctxt->sax2)
11605 #endif /* LIBXML_SAX1_ENABLED */
11606 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11607 #ifdef LIBXML_SAX1_ENABLED
11608 else
11609 name = xmlParseStartTag(ctxt);
11610 #endif /* LIBXML_SAX1_ENABLED */
11611 if (ctxt->instate == XML_PARSER_EOF)
11612 goto done;
11613 if (name == NULL) {
11614 spacePop(ctxt);
11615 xmlHaltParser(ctxt);
11616 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11617 ctxt->sax->endDocument(ctxt->userData);
11618 goto done;
11619 }
11620 #ifdef LIBXML_VALID_ENABLED
11621 /*
11622 * [ VC: Root Element Type ]
11623 * The Name in the document type declaration must match
11624 * the element type of the root element.
11625 */
11626 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11627 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11628 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11629 #endif /* LIBXML_VALID_ENABLED */
11630
11631 /*
11632 * Check for an Empty Element.
11633 */
11634 if ((RAW == '/') && (NXT(1) == '>')) {
11635 SKIP(2);
11636
11637 if (ctxt->sax2) {
11638 if ((ctxt->sax != NULL) &&
11639 (ctxt->sax->endElementNs != NULL) &&
11640 (!ctxt->disableSAX))
11641 ctxt->sax->endElementNs(ctxt->userData, name,
11642 prefix, URI);
11643 if (ctxt->nsNr - nsNr > 0)
11644 nsPop(ctxt, ctxt->nsNr - nsNr);
11645 #ifdef LIBXML_SAX1_ENABLED
11646 } else {
11647 if ((ctxt->sax != NULL) &&
11648 (ctxt->sax->endElement != NULL) &&
11649 (!ctxt->disableSAX))
11650 ctxt->sax->endElement(ctxt->userData, name);
11651 #endif /* LIBXML_SAX1_ENABLED */
11652 }
11653 if (ctxt->instate == XML_PARSER_EOF)
11654 goto done;
11655 spacePop(ctxt);
11656 if (ctxt->nameNr == 0) {
11657 ctxt->instate = XML_PARSER_EPILOG;
11658 } else {
11659 ctxt->instate = XML_PARSER_CONTENT;
11660 }
11661 ctxt->progressive = 1;
11662 break;
11663 }
11664 if (RAW == '>') {
11665 NEXT;
11666 } else {
11667 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11668 "Couldn't find end of Start Tag %s\n",
11669 name);
11670 nodePop(ctxt);
11671 spacePop(ctxt);
11672 }
11673 if (ctxt->sax2)
11674 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11675 #ifdef LIBXML_SAX1_ENABLED
11676 else
11677 namePush(ctxt, name);
11678 #endif /* LIBXML_SAX1_ENABLED */
11679
11680 ctxt->instate = XML_PARSER_CONTENT;
11681 ctxt->progressive = 1;
11682 break;
11683 }
11684 case XML_PARSER_CONTENT: {
11685 const xmlChar *test;
11686 unsigned int cons;
11687 if ((avail < 2) && (ctxt->inputNr == 1))
11688 goto done;
11689 cur = ctxt->input->cur[0];
11690 next = ctxt->input->cur[1];
11691
11692 test = CUR_PTR;
11693 cons = ctxt->input->consumed;
11694 if ((cur == '<') && (next == '/')) {
11695 ctxt->instate = XML_PARSER_END_TAG;
11696 break;
11697 } else if ((cur == '<') && (next == '?')) {
11698 if ((!terminate) &&
11699 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11700 ctxt->progressive = XML_PARSER_PI;
11701 goto done;
11702 }
11703 xmlParsePI(ctxt);
11704 ctxt->instate = XML_PARSER_CONTENT;
11705 ctxt->progressive = 1;
11706 } else if ((cur == '<') && (next != '!')) {
11707 ctxt->instate = XML_PARSER_START_TAG;
11708 break;
11709 } else if ((cur == '<') && (next == '!') &&
11710 (ctxt->input->cur[2] == '-') &&
11711 (ctxt->input->cur[3] == '-')) {
11712 int term;
11713
11714 if (avail < 4)
11715 goto done;
11716 ctxt->input->cur += 4;
11717 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11718 ctxt->input->cur -= 4;
11719 if ((!terminate) && (term < 0)) {
11720 ctxt->progressive = XML_PARSER_COMMENT;
11721 goto done;
11722 }
11723 xmlParseComment(ctxt);
11724 ctxt->instate = XML_PARSER_CONTENT;
11725 ctxt->progressive = 1;
11726 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11727 (ctxt->input->cur[2] == '[') &&
11728 (ctxt->input->cur[3] == 'C') &&
11729 (ctxt->input->cur[4] == 'D') &&
11730 (ctxt->input->cur[5] == 'A') &&
11731 (ctxt->input->cur[6] == 'T') &&
11732 (ctxt->input->cur[7] == 'A') &&
11733 (ctxt->input->cur[8] == '[')) {
11734 SKIP(9);
11735 ctxt->instate = XML_PARSER_CDATA_SECTION;
11736 break;
11737 } else if ((cur == '<') && (next == '!') &&
11738 (avail < 9)) {
11739 goto done;
11740 } else if (cur == '&') {
11741 if ((!terminate) &&
11742 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11743 goto done;
11744 xmlParseReference(ctxt);
11745 } else {
11746 /* TODO Avoid the extra copy, handle directly !!! */
11747 /*
11748 * Goal of the following test is:
11749 * - minimize calls to the SAX 'character' callback
11750 * when they are mergeable
11751 * - handle an problem for isBlank when we only parse
11752 * a sequence of blank chars and the next one is
11753 * not available to check against '<' presence.
11754 * - tries to homogenize the differences in SAX
11755 * callbacks between the push and pull versions
11756 * of the parser.
11757 */
11758 if ((ctxt->inputNr == 1) &&
11759 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11760 if (!terminate) {
11761 if (ctxt->progressive) {
11762 if ((lastlt == NULL) ||
11763 (ctxt->input->cur > lastlt))
11764 goto done;
11765 } else if (xmlParseLookupSequence(ctxt,
11766 '<', 0, 0) < 0) {
11767 goto done;
11768 }
11769 }
11770 }
11771 ctxt->checkIndex = 0;
11772 xmlParseCharData(ctxt, 0);
11773 }
11774 /*
11775 * Pop-up of finished entities.
11776 */
11777 while ((RAW == 0) && (ctxt->inputNr > 1))
11778 xmlPopInput(ctxt);
11779 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11780 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11781 "detected an error in element content\n");
11782 xmlHaltParser(ctxt);
11783 break;
11784 }
11785 break;
11786 }
11787 case XML_PARSER_END_TAG:
11788 if (avail < 2)
11789 goto done;
11790 if (!terminate) {
11791 if (ctxt->progressive) {
11792 /* > can be found unescaped in attribute values */
11793 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11794 goto done;
11795 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11796 goto done;
11797 }
11798 }
11799 if (ctxt->sax2) {
11800 xmlParseEndTag2(ctxt,
11801 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11802 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11803 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11804 nameNsPop(ctxt);
11805 }
11806 #ifdef LIBXML_SAX1_ENABLED
11807 else
11808 xmlParseEndTag1(ctxt, 0);
11809 #endif /* LIBXML_SAX1_ENABLED */
11810 if (ctxt->instate == XML_PARSER_EOF) {
11811 /* Nothing */
11812 } else if (ctxt->nameNr == 0) {
11813 ctxt->instate = XML_PARSER_EPILOG;
11814 } else {
11815 ctxt->instate = XML_PARSER_CONTENT;
11816 }
11817 break;
11818 case XML_PARSER_CDATA_SECTION: {
11819 /*
11820 * The Push mode need to have the SAX callback for
11821 * cdataBlock merge back contiguous callbacks.
11822 */
11823 int base;
11824
11825 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11826 if (base < 0) {
11827 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11828 int tmp;
11829
11830 tmp = xmlCheckCdataPush(ctxt->input->cur,
11831 XML_PARSER_BIG_BUFFER_SIZE, 0);
11832 if (tmp < 0) {
11833 tmp = -tmp;
11834 ctxt->input->cur += tmp;
11835 goto encoding_error;
11836 }
11837 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11838 if (ctxt->sax->cdataBlock != NULL)
11839 ctxt->sax->cdataBlock(ctxt->userData,
11840 ctxt->input->cur, tmp);
11841 else if (ctxt->sax->characters != NULL)
11842 ctxt->sax->characters(ctxt->userData,
11843 ctxt->input->cur, tmp);
11844 }
11845 if (ctxt->instate == XML_PARSER_EOF)
11846 goto done;
11847 SKIPL(tmp);
11848 ctxt->checkIndex = 0;
11849 }
11850 goto done;
11851 } else {
11852 int tmp;
11853
11854 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11855 if ((tmp < 0) || (tmp != base)) {
11856 tmp = -tmp;
11857 ctxt->input->cur += tmp;
11858 goto encoding_error;
11859 }
11860 if ((ctxt->sax != NULL) && (base == 0) &&
11861 (ctxt->sax->cdataBlock != NULL) &&
11862 (!ctxt->disableSAX)) {
11863 /*
11864 * Special case to provide identical behaviour
11865 * between pull and push parsers on enpty CDATA
11866 * sections
11867 */
11868 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11869 (!strncmp((const char *)&ctxt->input->cur[-9],
11870 "<![CDATA[", 9)))
11871 ctxt->sax->cdataBlock(ctxt->userData,
11872 BAD_CAST "", 0);
11873 } else if ((ctxt->sax != NULL) && (base > 0) &&
11874 (!ctxt->disableSAX)) {
11875 if (ctxt->sax->cdataBlock != NULL)
11876 ctxt->sax->cdataBlock(ctxt->userData,
11877 ctxt->input->cur, base);
11878 else if (ctxt->sax->characters != NULL)
11879 ctxt->sax->characters(ctxt->userData,
11880 ctxt->input->cur, base);
11881 }
11882 if (ctxt->instate == XML_PARSER_EOF)
11883 goto done;
11884 SKIPL(base + 3);
11885 ctxt->checkIndex = 0;
11886 ctxt->instate = XML_PARSER_CONTENT;
11887 #ifdef DEBUG_PUSH
11888 xmlGenericError(xmlGenericErrorContext,
11889 "PP: entering CONTENT\n");
11890 #endif
11891 }
11892 break;
11893 }
11894 case XML_PARSER_MISC:
11895 SKIP_BLANKS;
11896 if (ctxt->input->buf == NULL)
11897 avail = ctxt->input->length -
11898 (ctxt->input->cur - ctxt->input->base);
11899 else
11900 avail = xmlBufUse(ctxt->input->buf->buffer) -
11901 (ctxt->input->cur - ctxt->input->base);
11902 if (avail < 2)
11903 goto done;
11904 cur = ctxt->input->cur[0];
11905 next = ctxt->input->cur[1];
11906 if ((cur == '<') && (next == '?')) {
11907 if ((!terminate) &&
11908 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11909 ctxt->progressive = XML_PARSER_PI;
11910 goto done;
11911 }
11912 #ifdef DEBUG_PUSH
11913 xmlGenericError(xmlGenericErrorContext,
11914 "PP: Parsing PI\n");
11915 #endif
11916 xmlParsePI(ctxt);
11917 if (ctxt->instate == XML_PARSER_EOF)
11918 goto done;
11919 ctxt->instate = XML_PARSER_MISC;
11920 ctxt->progressive = 1;
11921 ctxt->checkIndex = 0;
11922 } else if ((cur == '<') && (next == '!') &&
11923 (ctxt->input->cur[2] == '-') &&
11924 (ctxt->input->cur[3] == '-')) {
11925 if ((!terminate) &&
11926 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11927 ctxt->progressive = XML_PARSER_COMMENT;
11928 goto done;
11929 }
11930 #ifdef DEBUG_PUSH
11931 xmlGenericError(xmlGenericErrorContext,
11932 "PP: Parsing Comment\n");
11933 #endif
11934 xmlParseComment(ctxt);
11935 if (ctxt->instate == XML_PARSER_EOF)
11936 goto done;
11937 ctxt->instate = XML_PARSER_MISC;
11938 ctxt->progressive = 1;
11939 ctxt->checkIndex = 0;
11940 } else if ((cur == '<') && (next == '!') &&
11941 (ctxt->input->cur[2] == 'D') &&
11942 (ctxt->input->cur[3] == 'O') &&
11943 (ctxt->input->cur[4] == 'C') &&
11944 (ctxt->input->cur[5] == 'T') &&
11945 (ctxt->input->cur[6] == 'Y') &&
11946 (ctxt->input->cur[7] == 'P') &&
11947 (ctxt->input->cur[8] == 'E')) {
11948 if ((!terminate) &&
11949 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11950 ctxt->progressive = XML_PARSER_DTD;
11951 goto done;
11952 }
11953 #ifdef DEBUG_PUSH
11954 xmlGenericError(xmlGenericErrorContext,
11955 "PP: Parsing internal subset\n");
11956 #endif
11957 ctxt->inSubset = 1;
11958 ctxt->progressive = 0;
11959 ctxt->checkIndex = 0;
11960 xmlParseDocTypeDecl(ctxt);
11961 if (ctxt->instate == XML_PARSER_EOF)
11962 goto done;
11963 if (RAW == '[') {
11964 ctxt->instate = XML_PARSER_DTD;
11965 #ifdef DEBUG_PUSH
11966 xmlGenericError(xmlGenericErrorContext,
11967 "PP: entering DTD\n");
11968 #endif
11969 } else {
11970 /*
11971 * Create and update the external subset.
11972 */
11973 ctxt->inSubset = 2;
11974 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11975 (ctxt->sax->externalSubset != NULL))
11976 ctxt->sax->externalSubset(ctxt->userData,
11977 ctxt->intSubName, ctxt->extSubSystem,
11978 ctxt->extSubURI);
11979 ctxt->inSubset = 0;
11980 xmlCleanSpecialAttr(ctxt);
11981 ctxt->instate = XML_PARSER_PROLOG;
11982 #ifdef DEBUG_PUSH
11983 xmlGenericError(xmlGenericErrorContext,
11984 "PP: entering PROLOG\n");
11985 #endif
11986 }
11987 } else if ((cur == '<') && (next == '!') &&
11988 (avail < 9)) {
11989 goto done;
11990 } else {
11991 ctxt->instate = XML_PARSER_START_TAG;
11992 ctxt->progressive = XML_PARSER_START_TAG;
11993 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11994 #ifdef DEBUG_PUSH
11995 xmlGenericError(xmlGenericErrorContext,
11996 "PP: entering START_TAG\n");
11997 #endif
11998 }
11999 break;
12000 case XML_PARSER_PROLOG:
12001 SKIP_BLANKS;
12002 if (ctxt->input->buf == NULL)
12003 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
12004 else
12005 avail = xmlBufUse(ctxt->input->buf->buffer) -
12006 (ctxt->input->cur - ctxt->input->base);
12007 if (avail < 2)
12008 goto done;
12009 cur = ctxt->input->cur[0];
12010 next = ctxt->input->cur[1];
12011 if ((cur == '<') && (next == '?')) {
12012 if ((!terminate) &&
12013 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
12014 ctxt->progressive = XML_PARSER_PI;
12015 goto done;
12016 }
12017 #ifdef DEBUG_PUSH
12018 xmlGenericError(xmlGenericErrorContext,
12019 "PP: Parsing PI\n");
12020 #endif
12021 xmlParsePI(ctxt);
12022 if (ctxt->instate == XML_PARSER_EOF)
12023 goto done;
12024 ctxt->instate = XML_PARSER_PROLOG;
12025 ctxt->progressive = 1;
12026 } else if ((cur == '<') && (next == '!') &&
12027 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
12028 if ((!terminate) &&
12029 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
12030 ctxt->progressive = XML_PARSER_COMMENT;
12031 goto done;
12032 }
12033 #ifdef DEBUG_PUSH
12034 xmlGenericError(xmlGenericErrorContext,
12035 "PP: Parsing Comment\n");
12036 #endif
12037 xmlParseComment(ctxt);
12038 if (ctxt->instate == XML_PARSER_EOF)
12039 goto done;
12040 ctxt->instate = XML_PARSER_PROLOG;
12041 ctxt->progressive = 1;
12042 } else if ((cur == '<') && (next == '!') &&
12043 (avail < 4)) {
12044 goto done;
12045 } else {
12046 ctxt->instate = XML_PARSER_START_TAG;
12047 if (ctxt->progressive == 0)
12048 ctxt->progressive = XML_PARSER_START_TAG;
12049 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
12050 #ifdef DEBUG_PUSH
12051 xmlGenericError(xmlGenericErrorContext,
12052 "PP: entering START_TAG\n");
12053 #endif
12054 }
12055 break;
12056 case XML_PARSER_EPILOG:
12057 SKIP_BLANKS;
12058 if (ctxt->input->buf == NULL)
12059 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
12060 else
12061 avail = xmlBufUse(ctxt->input->buf->buffer) -
12062 (ctxt->input->cur - ctxt->input->base);
12063 if (avail < 2)
12064 goto done;
12065 cur = ctxt->input->cur[0];
12066 next = ctxt->input->cur[1];
12067 if ((cur == '<') && (next == '?')) {
12068 if ((!terminate) &&
12069 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
12070 ctxt->progressive = XML_PARSER_PI;
12071 goto done;
12072 }
12073 #ifdef DEBUG_PUSH
12074 xmlGenericError(xmlGenericErrorContext,
12075 "PP: Parsing PI\n");
12076 #endif
12077 xmlParsePI(ctxt);
12078 if (ctxt->instate == XML_PARSER_EOF)
12079 goto done;
12080 ctxt->instate = XML_PARSER_EPILOG;
12081 ctxt->progressive = 1;
12082 } else if ((cur == '<') && (next == '!') &&
12083 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
12084 if ((!terminate) &&
12085 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
12086 ctxt->progressive = XML_PARSER_COMMENT;
12087 goto done;
12088 }
12089 #ifdef DEBUG_PUSH
12090 xmlGenericError(xmlGenericErrorContext,
12091 "PP: Parsing Comment\n");
12092 #endif
12093 xmlParseComment(ctxt);
12094 if (ctxt->instate == XML_PARSER_EOF)
12095 goto done;
12096 ctxt->instate = XML_PARSER_EPILOG;
12097 ctxt->progressive = 1;
12098 } else if ((cur == '<') && (next == '!') &&
12099 (avail < 4)) {
12100 goto done;
12101 } else {
12102 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12103 xmlHaltParser(ctxt);
12104 #ifdef DEBUG_PUSH
12105 xmlGenericError(xmlGenericErrorContext,
12106 "PP: entering EOF\n");
12107 #endif
12108 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12109 ctxt->sax->endDocument(ctxt->userData);
12110 goto done;
12111 }
12112 break;
12113 case XML_PARSER_DTD: {
12114 /*
12115 * Sorry but progressive parsing of the internal subset
12116 * is not expected to be supported. We first check that
12117 * the full content of the internal subset is available and
12118 * the parsing is launched only at that point.
12119 * Internal subset ends up with "']' S? '>'" in an unescaped
12120 * section and not in a ']]>' sequence which are conditional
12121 * sections (whoever argued to keep that crap in XML deserve
12122 * a place in hell !).
12123 */
12124 int base, i;
12125 xmlChar *buf;
12126 xmlChar quote = 0;
12127 size_t use;
12128
12129 base = ctxt->input->cur - ctxt->input->base;
12130 if (base < 0) return(0);
12131 if (ctxt->checkIndex > base)
12132 base = ctxt->checkIndex;
12133 buf = xmlBufContent(ctxt->input->buf->buffer);
12134 use = xmlBufUse(ctxt->input->buf->buffer);
12135 for (;(unsigned int) base < use; base++) {
12136 if (quote != 0) {
12137 if (buf[base] == quote)
12138 quote = 0;
12139 continue;
12140 }
12141 if ((quote == 0) && (buf[base] == '<')) {
12142 int found = 0;
12143 /* special handling of comments */
12144 if (((unsigned int) base + 4 < use) &&
12145 (buf[base + 1] == '!') &&
12146 (buf[base + 2] == '-') &&
12147 (buf[base + 3] == '-')) {
12148 for (;(unsigned int) base + 3 < use; base++) {
12149 if ((buf[base] == '-') &&
12150 (buf[base + 1] == '-') &&
12151 (buf[base + 2] == '>')) {
12152 found = 1;
12153 base += 2;
12154 break;
12155 }
12156 }
12157 if (!found) {
12158 #if 0
12159 fprintf(stderr, "unfinished comment\n");
12160 #endif
12161 break; /* for */
12162 }
12163 continue;
12164 }
12165 }
12166 if (buf[base] == '"') {
12167 quote = '"';
12168 continue;
12169 }
12170 if (buf[base] == '\'') {
12171 quote = '\'';
12172 continue;
12173 }
12174 if (buf[base] == ']') {
12175 #if 0
12176 fprintf(stderr, "%c%c%c%c: ", buf[base],
12177 buf[base + 1], buf[base + 2], buf[base + 3]);
12178 #endif
12179 if ((unsigned int) base +1 >= use)
12180 break;
12181 if (buf[base + 1] == ']') {
12182 /* conditional crap, skip both ']' ! */
12183 base++;
12184 continue;
12185 }
12186 for (i = 1; (unsigned int) base + i < use; i++) {
12187 if (buf[base + i] == '>') {
12188 #if 0
12189 fprintf(stderr, "found\n");
12190 #endif
12191 goto found_end_int_subset;
12192 }
12193 if (!IS_BLANK_CH(buf[base + i])) {
12194 #if 0
12195 fprintf(stderr, "not found\n");
12196 #endif
12197 goto not_end_of_int_subset;
12198 }
12199 }
12200 #if 0
12201 fprintf(stderr, "end of stream\n");
12202 #endif
12203 break;
12204
12205 }
12206 not_end_of_int_subset:
12207 continue; /* for */
12208 }
12209 /*
12210 * We didn't found the end of the Internal subset
12211 */
12212 if (quote == 0)
12213 ctxt->checkIndex = base;
12214 else
12215 ctxt->checkIndex = 0;
12216 #ifdef DEBUG_PUSH
12217 if (next == 0)
12218 xmlGenericError(xmlGenericErrorContext,
12219 "PP: lookup of int subset end filed\n");
12220 #endif
12221 goto done;
12222
12223 found_end_int_subset:
12224 ctxt->checkIndex = 0;
12225 xmlParseInternalSubset(ctxt);
12226 if (ctxt->instate == XML_PARSER_EOF)
12227 goto done;
12228 ctxt->inSubset = 2;
12229 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12230 (ctxt->sax->externalSubset != NULL))
12231 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12232 ctxt->extSubSystem, ctxt->extSubURI);
12233 ctxt->inSubset = 0;
12234 xmlCleanSpecialAttr(ctxt);
12235 if (ctxt->instate == XML_PARSER_EOF)
12236 goto done;
12237 ctxt->instate = XML_PARSER_PROLOG;
12238 ctxt->checkIndex = 0;
12239 #ifdef DEBUG_PUSH
12240 xmlGenericError(xmlGenericErrorContext,
12241 "PP: entering PROLOG\n");
12242 #endif
12243 break;
12244 }
12245 case XML_PARSER_COMMENT:
12246 xmlGenericError(xmlGenericErrorContext,
12247 "PP: internal error, state == COMMENT\n");
12248 ctxt->instate = XML_PARSER_CONTENT;
12249 #ifdef DEBUG_PUSH
12250 xmlGenericError(xmlGenericErrorContext,
12251 "PP: entering CONTENT\n");
12252 #endif
12253 break;
12254 case XML_PARSER_IGNORE:
12255 xmlGenericError(xmlGenericErrorContext,
12256 "PP: internal error, state == IGNORE");
12257 ctxt->instate = XML_PARSER_DTD;
12258 #ifdef DEBUG_PUSH
12259 xmlGenericError(xmlGenericErrorContext,
12260 "PP: entering DTD\n");
12261 #endif
12262 break;
12263 case XML_PARSER_PI:
12264 xmlGenericError(xmlGenericErrorContext,
12265 "PP: internal error, state == PI\n");
12266 ctxt->instate = XML_PARSER_CONTENT;
12267 #ifdef DEBUG_PUSH
12268 xmlGenericError(xmlGenericErrorContext,
12269 "PP: entering CONTENT\n");
12270 #endif
12271 break;
12272 case XML_PARSER_ENTITY_DECL:
12273 xmlGenericError(xmlGenericErrorContext,
12274 "PP: internal error, state == ENTITY_DECL\n");
12275 ctxt->instate = XML_PARSER_DTD;
12276 #ifdef DEBUG_PUSH
12277 xmlGenericError(xmlGenericErrorContext,
12278 "PP: entering DTD\n");
12279 #endif
12280 break;
12281 case XML_PARSER_ENTITY_VALUE:
12282 xmlGenericError(xmlGenericErrorContext,
12283 "PP: internal error, state == ENTITY_VALUE\n");
12284 ctxt->instate = XML_PARSER_CONTENT;
12285 #ifdef DEBUG_PUSH
12286 xmlGenericError(xmlGenericErrorContext,
12287 "PP: entering DTD\n");
12288 #endif
12289 break;
12290 case XML_PARSER_ATTRIBUTE_VALUE:
12291 xmlGenericError(xmlGenericErrorContext,
12292 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12293 ctxt->instate = XML_PARSER_START_TAG;
12294 #ifdef DEBUG_PUSH
12295 xmlGenericError(xmlGenericErrorContext,
12296 "PP: entering START_TAG\n");
12297 #endif
12298 break;
12299 case XML_PARSER_SYSTEM_LITERAL:
12300 xmlGenericError(xmlGenericErrorContext,
12301 "PP: internal error, state == SYSTEM_LITERAL\n");
12302 ctxt->instate = XML_PARSER_START_TAG;
12303 #ifdef DEBUG_PUSH
12304 xmlGenericError(xmlGenericErrorContext,
12305 "PP: entering START_TAG\n");
12306 #endif
12307 break;
12308 case XML_PARSER_PUBLIC_LITERAL:
12309 xmlGenericError(xmlGenericErrorContext,
12310 "PP: internal error, state == PUBLIC_LITERAL\n");
12311 ctxt->instate = XML_PARSER_START_TAG;
12312 #ifdef DEBUG_PUSH
12313 xmlGenericError(xmlGenericErrorContext,
12314 "PP: entering START_TAG\n");
12315 #endif
12316 break;
12317 }
12318 }
12319 done:
12320 #ifdef DEBUG_PUSH
12321 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12322 #endif
12323 return(ret);
12324 encoding_error:
12325 {
12326 char buffer[150];
12327
12328 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12329 ctxt->input->cur[0], ctxt->input->cur[1],
12330 ctxt->input->cur[2], ctxt->input->cur[3]);
12331 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12332 "Input is not proper UTF-8, indicate encoding !\n%s",
12333 BAD_CAST buffer, NULL);
12334 }
12335 return(0);
12336 }
12337
12338 /**
12339 * xmlParseCheckTransition:
12340 * @ctxt: an XML parser context
12341 * @chunk: a char array
12342 * @size: the size in byte of the chunk
12343 *
12344 * Check depending on the current parser state if the chunk given must be
12345 * processed immediately or one need more data to advance on parsing.
12346 *
12347 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12348 */
12349 static int
xmlParseCheckTransition(xmlParserCtxtPtr ctxt,const char * chunk,int size)12350 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12351 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12352 return(-1);
12353 if (ctxt->instate == XML_PARSER_START_TAG) {
12354 if (memchr(chunk, '>', size) != NULL)
12355 return(1);
12356 return(0);
12357 }
12358 if (ctxt->progressive == XML_PARSER_COMMENT) {
12359 if (memchr(chunk, '>', size) != NULL)
12360 return(1);
12361 return(0);
12362 }
12363 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12364 if (memchr(chunk, '>', size) != NULL)
12365 return(1);
12366 return(0);
12367 }
12368 if (ctxt->progressive == XML_PARSER_PI) {
12369 if (memchr(chunk, '>', size) != NULL)
12370 return(1);
12371 return(0);
12372 }
12373 if (ctxt->instate == XML_PARSER_END_TAG) {
12374 if (memchr(chunk, '>', size) != NULL)
12375 return(1);
12376 return(0);
12377 }
12378 if ((ctxt->progressive == XML_PARSER_DTD) ||
12379 (ctxt->instate == XML_PARSER_DTD)) {
12380 if (memchr(chunk, '>', size) != NULL)
12381 return(1);
12382 return(0);
12383 }
12384 return(1);
12385 }
12386
12387 /**
12388 * xmlParseChunk:
12389 * @ctxt: an XML parser context
12390 * @chunk: an char array
12391 * @size: the size in byte of the chunk
12392 * @terminate: last chunk indicator
12393 *
12394 * Parse a Chunk of memory
12395 *
12396 * Returns zero if no error, the xmlParserErrors otherwise.
12397 */
12398 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)12399 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12400 int terminate) {
12401 int end_in_lf = 0;
12402 int remain = 0;
12403 size_t old_avail = 0;
12404 size_t avail = 0;
12405
12406 if (ctxt == NULL)
12407 return(XML_ERR_INTERNAL_ERROR);
12408 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12409 return(ctxt->errNo);
12410 if (ctxt->instate == XML_PARSER_EOF)
12411 return(-1);
12412 if (ctxt->instate == XML_PARSER_START)
12413 xmlDetectSAX2(ctxt);
12414 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12415 (chunk[size - 1] == '\r')) {
12416 end_in_lf = 1;
12417 size--;
12418 }
12419
12420 xmldecl_done:
12421
12422 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12423 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12424 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12425 size_t cur = ctxt->input->cur - ctxt->input->base;
12426 int res;
12427
12428 old_avail = xmlBufUse(ctxt->input->buf->buffer);
12429 /*
12430 * Specific handling if we autodetected an encoding, we should not
12431 * push more than the first line ... which depend on the encoding
12432 * And only push the rest once the final encoding was detected
12433 */
12434 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12435 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12436 unsigned int len = 45;
12437
12438 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12439 BAD_CAST "UTF-16")) ||
12440 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12441 BAD_CAST "UTF16")))
12442 len = 90;
12443 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12444 BAD_CAST "UCS-4")) ||
12445 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12446 BAD_CAST "UCS4")))
12447 len = 180;
12448
12449 if (ctxt->input->buf->rawconsumed < len)
12450 len -= ctxt->input->buf->rawconsumed;
12451
12452 /*
12453 * Change size for reading the initial declaration only
12454 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12455 * will blindly copy extra bytes from memory.
12456 */
12457 if ((unsigned int) size > len) {
12458 remain = size - len;
12459 size = len;
12460 } else {
12461 remain = 0;
12462 }
12463 }
12464 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12465 if (res < 0) {
12466 ctxt->errNo = XML_PARSER_EOF;
12467 xmlHaltParser(ctxt);
12468 return (XML_PARSER_EOF);
12469 }
12470 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12471 #ifdef DEBUG_PUSH
12472 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12473 #endif
12474
12475 } else if (ctxt->instate != XML_PARSER_EOF) {
12476 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12477 xmlParserInputBufferPtr in = ctxt->input->buf;
12478 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12479 (in->raw != NULL)) {
12480 int nbchars;
12481 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12482 size_t current = ctxt->input->cur - ctxt->input->base;
12483
12484 nbchars = xmlCharEncInput(in, terminate);
12485 if (nbchars < 0) {
12486 /* TODO 2.6.0 */
12487 xmlGenericError(xmlGenericErrorContext,
12488 "xmlParseChunk: encoder error\n");
12489 return(XML_ERR_INVALID_ENCODING);
12490 }
12491 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12492 }
12493 }
12494 }
12495 if (remain != 0) {
12496 xmlParseTryOrFinish(ctxt, 0);
12497 } else {
12498 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12499 avail = xmlBufUse(ctxt->input->buf->buffer);
12500 /*
12501 * Depending on the current state it may not be such
12502 * a good idea to try parsing if there is nothing in the chunk
12503 * which would be worth doing a parser state transition and we
12504 * need to wait for more data
12505 */
12506 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12507 (old_avail == 0) || (avail == 0) ||
12508 (xmlParseCheckTransition(ctxt,
12509 (const char *)&ctxt->input->base[old_avail],
12510 avail - old_avail)))
12511 xmlParseTryOrFinish(ctxt, terminate);
12512 }
12513 if (ctxt->instate == XML_PARSER_EOF)
12514 return(ctxt->errNo);
12515
12516 if ((ctxt->input != NULL) &&
12517 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12518 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12519 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12520 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12521 xmlHaltParser(ctxt);
12522 }
12523 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12524 return(ctxt->errNo);
12525
12526 if (remain != 0) {
12527 chunk += size;
12528 size = remain;
12529 remain = 0;
12530 goto xmldecl_done;
12531 }
12532 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12533 (ctxt->input->buf != NULL)) {
12534 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12535 ctxt->input);
12536 size_t current = ctxt->input->cur - ctxt->input->base;
12537
12538 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12539
12540 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12541 base, current);
12542 }
12543 if (terminate) {
12544 /*
12545 * Check for termination
12546 */
12547 int cur_avail = 0;
12548
12549 if (ctxt->input != NULL) {
12550 if (ctxt->input->buf == NULL)
12551 cur_avail = ctxt->input->length -
12552 (ctxt->input->cur - ctxt->input->base);
12553 else
12554 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12555 (ctxt->input->cur - ctxt->input->base);
12556 }
12557
12558 if ((ctxt->instate != XML_PARSER_EOF) &&
12559 (ctxt->instate != XML_PARSER_EPILOG)) {
12560 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12561 }
12562 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12563 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12564 }
12565 if (ctxt->instate != XML_PARSER_EOF) {
12566 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12567 ctxt->sax->endDocument(ctxt->userData);
12568 }
12569 ctxt->instate = XML_PARSER_EOF;
12570 }
12571 if (ctxt->wellFormed == 0)
12572 return((xmlParserErrors) ctxt->errNo);
12573 else
12574 return(0);
12575 }
12576
12577 /************************************************************************
12578 * *
12579 * I/O front end functions to the parser *
12580 * *
12581 ************************************************************************/
12582
12583 /**
12584 * xmlCreatePushParserCtxt:
12585 * @sax: a SAX handler
12586 * @user_data: The user data returned on SAX callbacks
12587 * @chunk: a pointer to an array of chars
12588 * @size: number of chars in the array
12589 * @filename: an optional file name or URI
12590 *
12591 * Create a parser context for using the XML parser in push mode.
12592 * If @buffer and @size are non-NULL, the data is used to detect
12593 * the encoding. The remaining characters will be parsed so they
12594 * don't need to be fed in again through xmlParseChunk.
12595 * To allow content encoding detection, @size should be >= 4
12596 * The value of @filename is used for fetching external entities
12597 * and error/warning reports.
12598 *
12599 * Returns the new parser context or NULL
12600 */
12601
12602 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)12603 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12604 const char *chunk, int size, const char *filename) {
12605 xmlParserCtxtPtr ctxt;
12606 xmlParserInputPtr inputStream;
12607 xmlParserInputBufferPtr buf;
12608 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12609
12610 /*
12611 * plug some encoding conversion routines
12612 */
12613 if ((chunk != NULL) && (size >= 4))
12614 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12615
12616 buf = xmlAllocParserInputBuffer(enc);
12617 if (buf == NULL) return(NULL);
12618
12619 ctxt = xmlNewParserCtxt();
12620 if (ctxt == NULL) {
12621 xmlErrMemory(NULL, "creating parser: out of memory\n");
12622 xmlFreeParserInputBuffer(buf);
12623 return(NULL);
12624 }
12625 ctxt->dictNames = 1;
12626 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12627 if (ctxt->pushTab == NULL) {
12628 xmlErrMemory(ctxt, NULL);
12629 xmlFreeParserInputBuffer(buf);
12630 xmlFreeParserCtxt(ctxt);
12631 return(NULL);
12632 }
12633 if (sax != NULL) {
12634 #ifdef LIBXML_SAX1_ENABLED
12635 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12636 #endif /* LIBXML_SAX1_ENABLED */
12637 xmlFree(ctxt->sax);
12638 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12639 if (ctxt->sax == NULL) {
12640 xmlErrMemory(ctxt, NULL);
12641 xmlFreeParserInputBuffer(buf);
12642 xmlFreeParserCtxt(ctxt);
12643 return(NULL);
12644 }
12645 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12646 if (sax->initialized == XML_SAX2_MAGIC)
12647 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12648 else
12649 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12650 if (user_data != NULL)
12651 ctxt->userData = user_data;
12652 }
12653 if (filename == NULL) {
12654 ctxt->directory = NULL;
12655 } else {
12656 ctxt->directory = xmlParserGetDirectory(filename);
12657 }
12658
12659 inputStream = xmlNewInputStream(ctxt);
12660 if (inputStream == NULL) {
12661 xmlFreeParserCtxt(ctxt);
12662 xmlFreeParserInputBuffer(buf);
12663 return(NULL);
12664 }
12665
12666 if (filename == NULL)
12667 inputStream->filename = NULL;
12668 else {
12669 inputStream->filename = (char *)
12670 xmlCanonicPath((const xmlChar *) filename);
12671 if (inputStream->filename == NULL) {
12672 xmlFreeParserCtxt(ctxt);
12673 xmlFreeParserInputBuffer(buf);
12674 return(NULL);
12675 }
12676 }
12677 inputStream->buf = buf;
12678 xmlBufResetInput(inputStream->buf->buffer, inputStream);
12679 inputPush(ctxt, inputStream);
12680
12681 /*
12682 * If the caller didn't provide an initial 'chunk' for determining
12683 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12684 * that it can be automatically determined later
12685 */
12686 if ((size == 0) || (chunk == NULL)) {
12687 ctxt->charset = XML_CHAR_ENCODING_NONE;
12688 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12689 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12690 size_t cur = ctxt->input->cur - ctxt->input->base;
12691
12692 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12693
12694 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12695 #ifdef DEBUG_PUSH
12696 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12697 #endif
12698 }
12699
12700 if (enc != XML_CHAR_ENCODING_NONE) {
12701 xmlSwitchEncoding(ctxt, enc);
12702 }
12703
12704 return(ctxt);
12705 }
12706 #endif /* LIBXML_PUSH_ENABLED */
12707
12708 /**
12709 * xmlHaltParser:
12710 * @ctxt: an XML parser context
12711 *
12712 * Blocks further parser processing don't override error
12713 * for internal use
12714 */
12715 static void
xmlHaltParser(xmlParserCtxtPtr ctxt)12716 xmlHaltParser(xmlParserCtxtPtr ctxt) {
12717 if (ctxt == NULL)
12718 return;
12719 ctxt->instate = XML_PARSER_EOF;
12720 ctxt->disableSAX = 1;
12721 if (ctxt->input != NULL) {
12722 /*
12723 * in case there was a specific allocation deallocate before
12724 * overriding base
12725 */
12726 if (ctxt->input->free != NULL) {
12727 ctxt->input->free((xmlChar *) ctxt->input->base);
12728 ctxt->input->free = NULL;
12729 }
12730 ctxt->input->cur = BAD_CAST"";
12731 ctxt->input->base = ctxt->input->cur;
12732 }
12733 }
12734
12735 /**
12736 * xmlStopParser:
12737 * @ctxt: an XML parser context
12738 *
12739 * Blocks further parser processing
12740 */
12741 void
xmlStopParser(xmlParserCtxtPtr ctxt)12742 xmlStopParser(xmlParserCtxtPtr ctxt) {
12743 if (ctxt == NULL)
12744 return;
12745 xmlHaltParser(ctxt);
12746 ctxt->errNo = XML_ERR_USER_STOP;
12747 }
12748
12749 /**
12750 * xmlCreateIOParserCtxt:
12751 * @sax: a SAX handler
12752 * @user_data: The user data returned on SAX callbacks
12753 * @ioread: an I/O read function
12754 * @ioclose: an I/O close function
12755 * @ioctx: an I/O handler
12756 * @enc: the charset encoding if known
12757 *
12758 * Create a parser context for using the XML parser with an existing
12759 * I/O stream
12760 *
12761 * Returns the new parser context or NULL
12762 */
12763 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)12764 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12765 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12766 void *ioctx, xmlCharEncoding enc) {
12767 xmlParserCtxtPtr ctxt;
12768 xmlParserInputPtr inputStream;
12769 xmlParserInputBufferPtr buf;
12770
12771 if (ioread == NULL) return(NULL);
12772
12773 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12774 if (buf == NULL) {
12775 if (ioclose != NULL)
12776 ioclose(ioctx);
12777 return (NULL);
12778 }
12779
12780 ctxt = xmlNewParserCtxt();
12781 if (ctxt == NULL) {
12782 xmlFreeParserInputBuffer(buf);
12783 return(NULL);
12784 }
12785 if (sax != NULL) {
12786 #ifdef LIBXML_SAX1_ENABLED
12787 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12788 #endif /* LIBXML_SAX1_ENABLED */
12789 xmlFree(ctxt->sax);
12790 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12791 if (ctxt->sax == NULL) {
12792 xmlErrMemory(ctxt, NULL);
12793 xmlFreeParserCtxt(ctxt);
12794 return(NULL);
12795 }
12796 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12797 if (sax->initialized == XML_SAX2_MAGIC)
12798 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12799 else
12800 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12801 if (user_data != NULL)
12802 ctxt->userData = user_data;
12803 }
12804
12805 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12806 if (inputStream == NULL) {
12807 xmlFreeParserCtxt(ctxt);
12808 return(NULL);
12809 }
12810 inputPush(ctxt, inputStream);
12811
12812 return(ctxt);
12813 }
12814
12815 #ifdef LIBXML_VALID_ENABLED
12816 /************************************************************************
12817 * *
12818 * Front ends when parsing a DTD *
12819 * *
12820 ************************************************************************/
12821
12822 /**
12823 * xmlIOParseDTD:
12824 * @sax: the SAX handler block or NULL
12825 * @input: an Input Buffer
12826 * @enc: the charset encoding if known
12827 *
12828 * Load and parse a DTD
12829 *
12830 * Returns the resulting xmlDtdPtr or NULL in case of error.
12831 * @input will be freed by the function in any case.
12832 */
12833
12834 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)12835 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12836 xmlCharEncoding enc) {
12837 xmlDtdPtr ret = NULL;
12838 xmlParserCtxtPtr ctxt;
12839 xmlParserInputPtr pinput = NULL;
12840 xmlChar start[4];
12841
12842 if (input == NULL)
12843 return(NULL);
12844
12845 ctxt = xmlNewParserCtxt();
12846 if (ctxt == NULL) {
12847 xmlFreeParserInputBuffer(input);
12848 return(NULL);
12849 }
12850
12851 /* We are loading a DTD */
12852 ctxt->options |= XML_PARSE_DTDLOAD;
12853
12854 /*
12855 * Set-up the SAX context
12856 */
12857 if (sax != NULL) {
12858 if (ctxt->sax != NULL)
12859 xmlFree(ctxt->sax);
12860 ctxt->sax = sax;
12861 ctxt->userData = ctxt;
12862 }
12863 xmlDetectSAX2(ctxt);
12864
12865 /*
12866 * generate a parser input from the I/O handler
12867 */
12868
12869 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12870 if (pinput == NULL) {
12871 if (sax != NULL) ctxt->sax = NULL;
12872 xmlFreeParserInputBuffer(input);
12873 xmlFreeParserCtxt(ctxt);
12874 return(NULL);
12875 }
12876
12877 /*
12878 * plug some encoding conversion routines here.
12879 */
12880 if (xmlPushInput(ctxt, pinput) < 0) {
12881 if (sax != NULL) ctxt->sax = NULL;
12882 xmlFreeParserCtxt(ctxt);
12883 return(NULL);
12884 }
12885 if (enc != XML_CHAR_ENCODING_NONE) {
12886 xmlSwitchEncoding(ctxt, enc);
12887 }
12888
12889 pinput->filename = NULL;
12890 pinput->line = 1;
12891 pinput->col = 1;
12892 pinput->base = ctxt->input->cur;
12893 pinput->cur = ctxt->input->cur;
12894 pinput->free = NULL;
12895
12896 /*
12897 * let's parse that entity knowing it's an external subset.
12898 */
12899 ctxt->inSubset = 2;
12900 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12901 if (ctxt->myDoc == NULL) {
12902 xmlErrMemory(ctxt, "New Doc failed");
12903 return(NULL);
12904 }
12905 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12906 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12907 BAD_CAST "none", BAD_CAST "none");
12908
12909 if ((enc == XML_CHAR_ENCODING_NONE) &&
12910 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12911 /*
12912 * Get the 4 first bytes and decode the charset
12913 * if enc != XML_CHAR_ENCODING_NONE
12914 * plug some encoding conversion routines.
12915 */
12916 start[0] = RAW;
12917 start[1] = NXT(1);
12918 start[2] = NXT(2);
12919 start[3] = NXT(3);
12920 enc = xmlDetectCharEncoding(start, 4);
12921 if (enc != XML_CHAR_ENCODING_NONE) {
12922 xmlSwitchEncoding(ctxt, enc);
12923 }
12924 }
12925
12926 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12927
12928 if (ctxt->myDoc != NULL) {
12929 if (ctxt->wellFormed) {
12930 ret = ctxt->myDoc->extSubset;
12931 ctxt->myDoc->extSubset = NULL;
12932 if (ret != NULL) {
12933 xmlNodePtr tmp;
12934
12935 ret->doc = NULL;
12936 tmp = ret->children;
12937 while (tmp != NULL) {
12938 tmp->doc = NULL;
12939 tmp = tmp->next;
12940 }
12941 }
12942 } else {
12943 ret = NULL;
12944 }
12945 xmlFreeDoc(ctxt->myDoc);
12946 ctxt->myDoc = NULL;
12947 }
12948 if (sax != NULL) ctxt->sax = NULL;
12949 xmlFreeParserCtxt(ctxt);
12950
12951 return(ret);
12952 }
12953
12954 /**
12955 * xmlSAXParseDTD:
12956 * @sax: the SAX handler block
12957 * @ExternalID: a NAME* containing the External ID of the DTD
12958 * @SystemID: a NAME* containing the URL to the DTD
12959 *
12960 * Load and parse an external subset.
12961 *
12962 * Returns the resulting xmlDtdPtr or NULL in case of error.
12963 */
12964
12965 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)12966 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12967 const xmlChar *SystemID) {
12968 xmlDtdPtr ret = NULL;
12969 xmlParserCtxtPtr ctxt;
12970 xmlParserInputPtr input = NULL;
12971 xmlCharEncoding enc;
12972 xmlChar* systemIdCanonic;
12973
12974 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12975
12976 ctxt = xmlNewParserCtxt();
12977 if (ctxt == NULL) {
12978 return(NULL);
12979 }
12980
12981 /* We are loading a DTD */
12982 ctxt->options |= XML_PARSE_DTDLOAD;
12983
12984 /*
12985 * Set-up the SAX context
12986 */
12987 if (sax != NULL) {
12988 if (ctxt->sax != NULL)
12989 xmlFree(ctxt->sax);
12990 ctxt->sax = sax;
12991 ctxt->userData = ctxt;
12992 }
12993
12994 /*
12995 * Canonicalise the system ID
12996 */
12997 systemIdCanonic = xmlCanonicPath(SystemID);
12998 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12999 xmlFreeParserCtxt(ctxt);
13000 return(NULL);
13001 }
13002
13003 /*
13004 * Ask the Entity resolver to load the damn thing
13005 */
13006
13007 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
13008 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
13009 systemIdCanonic);
13010 if (input == NULL) {
13011 if (sax != NULL) ctxt->sax = NULL;
13012 xmlFreeParserCtxt(ctxt);
13013 if (systemIdCanonic != NULL)
13014 xmlFree(systemIdCanonic);
13015 return(NULL);
13016 }
13017
13018 /*
13019 * plug some encoding conversion routines here.
13020 */
13021 if (xmlPushInput(ctxt, input) < 0) {
13022 if (sax != NULL) ctxt->sax = NULL;
13023 xmlFreeParserCtxt(ctxt);
13024 if (systemIdCanonic != NULL)
13025 xmlFree(systemIdCanonic);
13026 return(NULL);
13027 }
13028 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13029 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
13030 xmlSwitchEncoding(ctxt, enc);
13031 }
13032
13033 if (input->filename == NULL)
13034 input->filename = (char *) systemIdCanonic;
13035 else
13036 xmlFree(systemIdCanonic);
13037 input->line = 1;
13038 input->col = 1;
13039 input->base = ctxt->input->cur;
13040 input->cur = ctxt->input->cur;
13041 input->free = NULL;
13042
13043 /*
13044 * let's parse that entity knowing it's an external subset.
13045 */
13046 ctxt->inSubset = 2;
13047 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
13048 if (ctxt->myDoc == NULL) {
13049 xmlErrMemory(ctxt, "New Doc failed");
13050 if (sax != NULL) ctxt->sax = NULL;
13051 xmlFreeParserCtxt(ctxt);
13052 return(NULL);
13053 }
13054 ctxt->myDoc->properties = XML_DOC_INTERNAL;
13055 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
13056 ExternalID, SystemID);
13057 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
13058
13059 if (ctxt->myDoc != NULL) {
13060 if (ctxt->wellFormed) {
13061 ret = ctxt->myDoc->extSubset;
13062 ctxt->myDoc->extSubset = NULL;
13063 if (ret != NULL) {
13064 xmlNodePtr tmp;
13065
13066 ret->doc = NULL;
13067 tmp = ret->children;
13068 while (tmp != NULL) {
13069 tmp->doc = NULL;
13070 tmp = tmp->next;
13071 }
13072 }
13073 } else {
13074 ret = NULL;
13075 }
13076 xmlFreeDoc(ctxt->myDoc);
13077 ctxt->myDoc = NULL;
13078 }
13079 if (sax != NULL) ctxt->sax = NULL;
13080 xmlFreeParserCtxt(ctxt);
13081
13082 return(ret);
13083 }
13084
13085
13086 /**
13087 * xmlParseDTD:
13088 * @ExternalID: a NAME* containing the External ID of the DTD
13089 * @SystemID: a NAME* containing the URL to the DTD
13090 *
13091 * Load and parse an external subset.
13092 *
13093 * Returns the resulting xmlDtdPtr or NULL in case of error.
13094 */
13095
13096 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)13097 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
13098 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
13099 }
13100 #endif /* LIBXML_VALID_ENABLED */
13101
13102 /************************************************************************
13103 * *
13104 * Front ends when parsing an Entity *
13105 * *
13106 ************************************************************************/
13107
13108 /**
13109 * xmlParseCtxtExternalEntity:
13110 * @ctx: the existing parsing context
13111 * @URL: the URL for the entity to load
13112 * @ID: the System ID for the entity to load
13113 * @lst: the return value for the set of parsed nodes
13114 *
13115 * Parse an external general entity within an existing parsing context
13116 * An external general parsed entity is well-formed if it matches the
13117 * production labeled extParsedEnt.
13118 *
13119 * [78] extParsedEnt ::= TextDecl? content
13120 *
13121 * Returns 0 if the entity is well formed, -1 in case of args problem and
13122 * the parser error code otherwise
13123 */
13124
13125 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)13126 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
13127 const xmlChar *ID, xmlNodePtr *lst) {
13128 xmlParserCtxtPtr ctxt;
13129 xmlDocPtr newDoc;
13130 xmlNodePtr newRoot;
13131 xmlSAXHandlerPtr oldsax = NULL;
13132 int ret = 0;
13133 xmlChar start[4];
13134 xmlCharEncoding enc;
13135
13136 if (ctx == NULL) return(-1);
13137
13138 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
13139 (ctx->depth > 1024)) {
13140 return(XML_ERR_ENTITY_LOOP);
13141 }
13142
13143 if (lst != NULL)
13144 *lst = NULL;
13145 if ((URL == NULL) && (ID == NULL))
13146 return(-1);
13147 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
13148 return(-1);
13149
13150 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
13151 if (ctxt == NULL) {
13152 return(-1);
13153 }
13154
13155 oldsax = ctxt->sax;
13156 ctxt->sax = ctx->sax;
13157 xmlDetectSAX2(ctxt);
13158 newDoc = xmlNewDoc(BAD_CAST "1.0");
13159 if (newDoc == NULL) {
13160 xmlFreeParserCtxt(ctxt);
13161 return(-1);
13162 }
13163 newDoc->properties = XML_DOC_INTERNAL;
13164 if (ctx->myDoc->dict) {
13165 newDoc->dict = ctx->myDoc->dict;
13166 xmlDictReference(newDoc->dict);
13167 }
13168 if (ctx->myDoc != NULL) {
13169 newDoc->intSubset = ctx->myDoc->intSubset;
13170 newDoc->extSubset = ctx->myDoc->extSubset;
13171 }
13172 if (ctx->myDoc->URL != NULL) {
13173 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
13174 }
13175 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13176 if (newRoot == NULL) {
13177 ctxt->sax = oldsax;
13178 xmlFreeParserCtxt(ctxt);
13179 newDoc->intSubset = NULL;
13180 newDoc->extSubset = NULL;
13181 xmlFreeDoc(newDoc);
13182 return(-1);
13183 }
13184 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13185 nodePush(ctxt, newDoc->children);
13186 if (ctx->myDoc == NULL) {
13187 ctxt->myDoc = newDoc;
13188 } else {
13189 ctxt->myDoc = ctx->myDoc;
13190 newDoc->children->doc = ctx->myDoc;
13191 }
13192
13193 /*
13194 * Get the 4 first bytes and decode the charset
13195 * if enc != XML_CHAR_ENCODING_NONE
13196 * plug some encoding conversion routines.
13197 */
13198 GROW
13199 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13200 start[0] = RAW;
13201 start[1] = NXT(1);
13202 start[2] = NXT(2);
13203 start[3] = NXT(3);
13204 enc = xmlDetectCharEncoding(start, 4);
13205 if (enc != XML_CHAR_ENCODING_NONE) {
13206 xmlSwitchEncoding(ctxt, enc);
13207 }
13208 }
13209
13210 /*
13211 * Parse a possible text declaration first
13212 */
13213 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13214 xmlParseTextDecl(ctxt);
13215 /*
13216 * An XML-1.0 document can't reference an entity not XML-1.0
13217 */
13218 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
13219 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13220 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13221 "Version mismatch between document and entity\n");
13222 }
13223 }
13224
13225 /*
13226 * If the user provided its own SAX callbacks then reuse the
13227 * useData callback field, otherwise the expected setup in a
13228 * DOM builder is to have userData == ctxt
13229 */
13230 if (ctx->userData == ctx)
13231 ctxt->userData = ctxt;
13232 else
13233 ctxt->userData = ctx->userData;
13234
13235 /*
13236 * Doing validity checking on chunk doesn't make sense
13237 */
13238 ctxt->instate = XML_PARSER_CONTENT;
13239 ctxt->validate = ctx->validate;
13240 ctxt->valid = ctx->valid;
13241 ctxt->loadsubset = ctx->loadsubset;
13242 ctxt->depth = ctx->depth + 1;
13243 ctxt->replaceEntities = ctx->replaceEntities;
13244 if (ctxt->validate) {
13245 ctxt->vctxt.error = ctx->vctxt.error;
13246 ctxt->vctxt.warning = ctx->vctxt.warning;
13247 } else {
13248 ctxt->vctxt.error = NULL;
13249 ctxt->vctxt.warning = NULL;
13250 }
13251 ctxt->vctxt.nodeTab = NULL;
13252 ctxt->vctxt.nodeNr = 0;
13253 ctxt->vctxt.nodeMax = 0;
13254 ctxt->vctxt.node = NULL;
13255 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13256 ctxt->dict = ctx->dict;
13257 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13258 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13259 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13260 ctxt->dictNames = ctx->dictNames;
13261 ctxt->attsDefault = ctx->attsDefault;
13262 ctxt->attsSpecial = ctx->attsSpecial;
13263 ctxt->linenumbers = ctx->linenumbers;
13264
13265 xmlParseContent(ctxt);
13266
13267 ctx->validate = ctxt->validate;
13268 ctx->valid = ctxt->valid;
13269 if ((RAW == '<') && (NXT(1) == '/')) {
13270 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13271 } else if (RAW != 0) {
13272 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13273 }
13274 if (ctxt->node != newDoc->children) {
13275 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13276 }
13277
13278 if (!ctxt->wellFormed) {
13279 if (ctxt->errNo == 0)
13280 ret = 1;
13281 else
13282 ret = ctxt->errNo;
13283 } else {
13284 if (lst != NULL) {
13285 xmlNodePtr cur;
13286
13287 /*
13288 * Return the newly created nodeset after unlinking it from
13289 * they pseudo parent.
13290 */
13291 cur = newDoc->children->children;
13292 *lst = cur;
13293 while (cur != NULL) {
13294 cur->parent = NULL;
13295 cur = cur->next;
13296 }
13297 newDoc->children->children = NULL;
13298 }
13299 ret = 0;
13300 }
13301 ctxt->sax = oldsax;
13302 ctxt->dict = NULL;
13303 ctxt->attsDefault = NULL;
13304 ctxt->attsSpecial = NULL;
13305 xmlFreeParserCtxt(ctxt);
13306 newDoc->intSubset = NULL;
13307 newDoc->extSubset = NULL;
13308 xmlFreeDoc(newDoc);
13309
13310 return(ret);
13311 }
13312
13313 /**
13314 * xmlParseExternalEntityPrivate:
13315 * @doc: the document the chunk pertains to
13316 * @oldctxt: the previous parser context if available
13317 * @sax: the SAX handler bloc (possibly NULL)
13318 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13319 * @depth: Used for loop detection, use 0
13320 * @URL: the URL for the entity to load
13321 * @ID: the System ID for the entity to load
13322 * @list: the return value for the set of parsed nodes
13323 *
13324 * Private version of xmlParseExternalEntity()
13325 *
13326 * Returns 0 if the entity is well formed, -1 in case of args problem and
13327 * the parser error code otherwise
13328 */
13329
13330 static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc,xmlParserCtxtPtr oldctxt,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)13331 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13332 xmlSAXHandlerPtr sax,
13333 void *user_data, int depth, const xmlChar *URL,
13334 const xmlChar *ID, xmlNodePtr *list) {
13335 xmlParserCtxtPtr ctxt;
13336 xmlDocPtr newDoc;
13337 xmlNodePtr newRoot;
13338 xmlSAXHandlerPtr oldsax = NULL;
13339 xmlParserErrors ret = XML_ERR_OK;
13340 xmlChar start[4];
13341 xmlCharEncoding enc;
13342
13343 if (((depth > 40) &&
13344 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13345 (depth > 1024)) {
13346 return(XML_ERR_ENTITY_LOOP);
13347 }
13348
13349 if (list != NULL)
13350 *list = NULL;
13351 if ((URL == NULL) && (ID == NULL))
13352 return(XML_ERR_INTERNAL_ERROR);
13353 if (doc == NULL)
13354 return(XML_ERR_INTERNAL_ERROR);
13355
13356
13357 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13358 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13359 ctxt->userData = ctxt;
13360 if (oldctxt != NULL) {
13361 ctxt->_private = oldctxt->_private;
13362 ctxt->loadsubset = oldctxt->loadsubset;
13363 ctxt->validate = oldctxt->validate;
13364 ctxt->external = oldctxt->external;
13365 ctxt->record_info = oldctxt->record_info;
13366 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13367 ctxt->node_seq.length = oldctxt->node_seq.length;
13368 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13369 } else {
13370 /*
13371 * Doing validity checking on chunk without context
13372 * doesn't make sense
13373 */
13374 ctxt->_private = NULL;
13375 ctxt->validate = 0;
13376 ctxt->external = 2;
13377 ctxt->loadsubset = 0;
13378 }
13379 if (sax != NULL) {
13380 oldsax = ctxt->sax;
13381 ctxt->sax = sax;
13382 if (user_data != NULL)
13383 ctxt->userData = user_data;
13384 }
13385 xmlDetectSAX2(ctxt);
13386 newDoc = xmlNewDoc(BAD_CAST "1.0");
13387 if (newDoc == NULL) {
13388 ctxt->node_seq.maximum = 0;
13389 ctxt->node_seq.length = 0;
13390 ctxt->node_seq.buffer = NULL;
13391 xmlFreeParserCtxt(ctxt);
13392 return(XML_ERR_INTERNAL_ERROR);
13393 }
13394 newDoc->properties = XML_DOC_INTERNAL;
13395 newDoc->intSubset = doc->intSubset;
13396 newDoc->extSubset = doc->extSubset;
13397 newDoc->dict = doc->dict;
13398 xmlDictReference(newDoc->dict);
13399
13400 if (doc->URL != NULL) {
13401 newDoc->URL = xmlStrdup(doc->URL);
13402 }
13403 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13404 if (newRoot == NULL) {
13405 if (sax != NULL)
13406 ctxt->sax = oldsax;
13407 ctxt->node_seq.maximum = 0;
13408 ctxt->node_seq.length = 0;
13409 ctxt->node_seq.buffer = NULL;
13410 xmlFreeParserCtxt(ctxt);
13411 newDoc->intSubset = NULL;
13412 newDoc->extSubset = NULL;
13413 xmlFreeDoc(newDoc);
13414 return(XML_ERR_INTERNAL_ERROR);
13415 }
13416 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13417 nodePush(ctxt, newDoc->children);
13418 ctxt->myDoc = doc;
13419 newRoot->doc = doc;
13420
13421 /*
13422 * Get the 4 first bytes and decode the charset
13423 * if enc != XML_CHAR_ENCODING_NONE
13424 * plug some encoding conversion routines.
13425 */
13426 GROW;
13427 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13428 start[0] = RAW;
13429 start[1] = NXT(1);
13430 start[2] = NXT(2);
13431 start[3] = NXT(3);
13432 enc = xmlDetectCharEncoding(start, 4);
13433 if (enc != XML_CHAR_ENCODING_NONE) {
13434 xmlSwitchEncoding(ctxt, enc);
13435 }
13436 }
13437
13438 /*
13439 * Parse a possible text declaration first
13440 */
13441 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13442 xmlParseTextDecl(ctxt);
13443 }
13444
13445 ctxt->instate = XML_PARSER_CONTENT;
13446 ctxt->depth = depth;
13447
13448 xmlParseContent(ctxt);
13449
13450 if ((RAW == '<') && (NXT(1) == '/')) {
13451 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13452 } else if (RAW != 0) {
13453 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13454 }
13455 if (ctxt->node != newDoc->children) {
13456 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13457 }
13458
13459 if (!ctxt->wellFormed) {
13460 if (ctxt->errNo == 0)
13461 ret = XML_ERR_INTERNAL_ERROR;
13462 else
13463 ret = (xmlParserErrors)ctxt->errNo;
13464 } else {
13465 if (list != NULL) {
13466 xmlNodePtr cur;
13467
13468 /*
13469 * Return the newly created nodeset after unlinking it from
13470 * they pseudo parent.
13471 */
13472 cur = newDoc->children->children;
13473 *list = cur;
13474 while (cur != NULL) {
13475 cur->parent = NULL;
13476 cur = cur->next;
13477 }
13478 newDoc->children->children = NULL;
13479 }
13480 ret = XML_ERR_OK;
13481 }
13482
13483 /*
13484 * Record in the parent context the number of entities replacement
13485 * done when parsing that reference.
13486 */
13487 if (oldctxt != NULL)
13488 oldctxt->nbentities += ctxt->nbentities;
13489
13490 /*
13491 * Also record the size of the entity parsed
13492 */
13493 if (ctxt->input != NULL && oldctxt != NULL) {
13494 oldctxt->sizeentities += ctxt->input->consumed;
13495 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13496 }
13497 /*
13498 * And record the last error if any
13499 */
13500 if (ctxt->lastError.code != XML_ERR_OK)
13501 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13502
13503 if (sax != NULL)
13504 ctxt->sax = oldsax;
13505 if (oldctxt != NULL) {
13506 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13507 oldctxt->node_seq.length = ctxt->node_seq.length;
13508 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13509 }
13510 ctxt->node_seq.maximum = 0;
13511 ctxt->node_seq.length = 0;
13512 ctxt->node_seq.buffer = NULL;
13513 xmlFreeParserCtxt(ctxt);
13514 newDoc->intSubset = NULL;
13515 newDoc->extSubset = NULL;
13516 xmlFreeDoc(newDoc);
13517
13518 return(ret);
13519 }
13520
13521 #ifdef LIBXML_SAX1_ENABLED
13522 /**
13523 * xmlParseExternalEntity:
13524 * @doc: the document the chunk pertains to
13525 * @sax: the SAX handler bloc (possibly NULL)
13526 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13527 * @depth: Used for loop detection, use 0
13528 * @URL: the URL for the entity to load
13529 * @ID: the System ID for the entity to load
13530 * @lst: the return value for the set of parsed nodes
13531 *
13532 * Parse an external general entity
13533 * An external general parsed entity is well-formed if it matches the
13534 * production labeled extParsedEnt.
13535 *
13536 * [78] extParsedEnt ::= TextDecl? content
13537 *
13538 * Returns 0 if the entity is well formed, -1 in case of args problem and
13539 * the parser error code otherwise
13540 */
13541
13542 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)13543 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13544 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13545 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13546 ID, lst));
13547 }
13548
13549 /**
13550 * xmlParseBalancedChunkMemory:
13551 * @doc: the document the chunk pertains to
13552 * @sax: the SAX handler bloc (possibly NULL)
13553 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13554 * @depth: Used for loop detection, use 0
13555 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13556 * @lst: the return value for the set of parsed nodes
13557 *
13558 * Parse a well-balanced chunk of an XML document
13559 * called by the parser
13560 * The allowed sequence for the Well Balanced Chunk is the one defined by
13561 * the content production in the XML grammar:
13562 *
13563 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13564 *
13565 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13566 * the parser error code otherwise
13567 */
13568
13569 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)13570 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13571 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13572 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13573 depth, string, lst, 0 );
13574 }
13575 #endif /* LIBXML_SAX1_ENABLED */
13576
13577 /**
13578 * xmlParseBalancedChunkMemoryInternal:
13579 * @oldctxt: the existing parsing context
13580 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13581 * @user_data: the user data field for the parser context
13582 * @lst: the return value for the set of parsed nodes
13583 *
13584 *
13585 * Parse a well-balanced chunk of an XML document
13586 * called by the parser
13587 * The allowed sequence for the Well Balanced Chunk is the one defined by
13588 * the content production in the XML grammar:
13589 *
13590 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13591 *
13592 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13593 * error code otherwise
13594 *
13595 * In case recover is set to 1, the nodelist will not be empty even if
13596 * the parsed chunk is not well balanced.
13597 */
13598 static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,const xmlChar * string,void * user_data,xmlNodePtr * lst)13599 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13600 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13601 xmlParserCtxtPtr ctxt;
13602 xmlDocPtr newDoc = NULL;
13603 xmlNodePtr newRoot;
13604 xmlSAXHandlerPtr oldsax = NULL;
13605 xmlNodePtr content = NULL;
13606 xmlNodePtr last = NULL;
13607 int size;
13608 xmlParserErrors ret = XML_ERR_OK;
13609 #ifdef SAX2
13610 int i;
13611 #endif
13612
13613 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13614 (oldctxt->depth > 1024)) {
13615 return(XML_ERR_ENTITY_LOOP);
13616 }
13617
13618
13619 if (lst != NULL)
13620 *lst = NULL;
13621 if (string == NULL)
13622 return(XML_ERR_INTERNAL_ERROR);
13623
13624 size = xmlStrlen(string);
13625
13626 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13627 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13628 if (user_data != NULL)
13629 ctxt->userData = user_data;
13630 else
13631 ctxt->userData = ctxt;
13632 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13633 ctxt->dict = oldctxt->dict;
13634 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13635 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13636 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13637
13638 #ifdef SAX2
13639 /* propagate namespaces down the entity */
13640 for (i = 0;i < oldctxt->nsNr;i += 2) {
13641 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13642 }
13643 #endif
13644
13645 oldsax = ctxt->sax;
13646 ctxt->sax = oldctxt->sax;
13647 xmlDetectSAX2(ctxt);
13648 ctxt->replaceEntities = oldctxt->replaceEntities;
13649 ctxt->options = oldctxt->options;
13650
13651 ctxt->_private = oldctxt->_private;
13652 if (oldctxt->myDoc == NULL) {
13653 newDoc = xmlNewDoc(BAD_CAST "1.0");
13654 if (newDoc == NULL) {
13655 ctxt->sax = oldsax;
13656 ctxt->dict = NULL;
13657 xmlFreeParserCtxt(ctxt);
13658 return(XML_ERR_INTERNAL_ERROR);
13659 }
13660 newDoc->properties = XML_DOC_INTERNAL;
13661 newDoc->dict = ctxt->dict;
13662 xmlDictReference(newDoc->dict);
13663 ctxt->myDoc = newDoc;
13664 } else {
13665 ctxt->myDoc = oldctxt->myDoc;
13666 content = ctxt->myDoc->children;
13667 last = ctxt->myDoc->last;
13668 }
13669 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13670 if (newRoot == NULL) {
13671 ctxt->sax = oldsax;
13672 ctxt->dict = NULL;
13673 xmlFreeParserCtxt(ctxt);
13674 if (newDoc != NULL) {
13675 xmlFreeDoc(newDoc);
13676 }
13677 return(XML_ERR_INTERNAL_ERROR);
13678 }
13679 ctxt->myDoc->children = NULL;
13680 ctxt->myDoc->last = NULL;
13681 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13682 nodePush(ctxt, ctxt->myDoc->children);
13683 ctxt->instate = XML_PARSER_CONTENT;
13684 ctxt->depth = oldctxt->depth + 1;
13685
13686 ctxt->validate = 0;
13687 ctxt->loadsubset = oldctxt->loadsubset;
13688 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13689 /*
13690 * ID/IDREF registration will be done in xmlValidateElement below
13691 */
13692 ctxt->loadsubset |= XML_SKIP_IDS;
13693 }
13694 ctxt->dictNames = oldctxt->dictNames;
13695 ctxt->attsDefault = oldctxt->attsDefault;
13696 ctxt->attsSpecial = oldctxt->attsSpecial;
13697
13698 xmlParseContent(ctxt);
13699 if ((RAW == '<') && (NXT(1) == '/')) {
13700 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13701 } else if (RAW != 0) {
13702 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13703 }
13704 if (ctxt->node != ctxt->myDoc->children) {
13705 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13706 }
13707
13708 if (!ctxt->wellFormed) {
13709 if (ctxt->errNo == 0)
13710 ret = XML_ERR_INTERNAL_ERROR;
13711 else
13712 ret = (xmlParserErrors)ctxt->errNo;
13713 } else {
13714 ret = XML_ERR_OK;
13715 }
13716
13717 if ((lst != NULL) && (ret == XML_ERR_OK)) {
13718 xmlNodePtr cur;
13719
13720 /*
13721 * Return the newly created nodeset after unlinking it from
13722 * they pseudo parent.
13723 */
13724 cur = ctxt->myDoc->children->children;
13725 *lst = cur;
13726 while (cur != NULL) {
13727 #ifdef LIBXML_VALID_ENABLED
13728 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13729 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13730 (cur->type == XML_ELEMENT_NODE)) {
13731 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13732 oldctxt->myDoc, cur);
13733 }
13734 #endif /* LIBXML_VALID_ENABLED */
13735 cur->parent = NULL;
13736 cur = cur->next;
13737 }
13738 ctxt->myDoc->children->children = NULL;
13739 }
13740 if (ctxt->myDoc != NULL) {
13741 xmlFreeNode(ctxt->myDoc->children);
13742 ctxt->myDoc->children = content;
13743 ctxt->myDoc->last = last;
13744 }
13745
13746 /*
13747 * Record in the parent context the number of entities replacement
13748 * done when parsing that reference.
13749 */
13750 if (oldctxt != NULL)
13751 oldctxt->nbentities += ctxt->nbentities;
13752
13753 /*
13754 * Also record the last error if any
13755 */
13756 if (ctxt->lastError.code != XML_ERR_OK)
13757 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13758
13759 ctxt->sax = oldsax;
13760 ctxt->dict = NULL;
13761 ctxt->attsDefault = NULL;
13762 ctxt->attsSpecial = NULL;
13763 xmlFreeParserCtxt(ctxt);
13764 if (newDoc != NULL) {
13765 xmlFreeDoc(newDoc);
13766 }
13767
13768 return(ret);
13769 }
13770
13771 /**
13772 * xmlParseInNodeContext:
13773 * @node: the context node
13774 * @data: the input string
13775 * @datalen: the input string length in bytes
13776 * @options: a combination of xmlParserOption
13777 * @lst: the return value for the set of parsed nodes
13778 *
13779 * Parse a well-balanced chunk of an XML document
13780 * within the context (DTD, namespaces, etc ...) of the given node.
13781 *
13782 * The allowed sequence for the data is a Well Balanced Chunk defined by
13783 * the content production in the XML grammar:
13784 *
13785 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13786 *
13787 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13788 * error code otherwise
13789 */
13790 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)13791 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13792 int options, xmlNodePtr *lst) {
13793 #ifdef SAX2
13794 xmlParserCtxtPtr ctxt;
13795 xmlDocPtr doc = NULL;
13796 xmlNodePtr fake, cur;
13797 int nsnr = 0;
13798
13799 xmlParserErrors ret = XML_ERR_OK;
13800
13801 /*
13802 * check all input parameters, grab the document
13803 */
13804 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13805 return(XML_ERR_INTERNAL_ERROR);
13806 switch (node->type) {
13807 case XML_ELEMENT_NODE:
13808 case XML_ATTRIBUTE_NODE:
13809 case XML_TEXT_NODE:
13810 case XML_CDATA_SECTION_NODE:
13811 case XML_ENTITY_REF_NODE:
13812 case XML_PI_NODE:
13813 case XML_COMMENT_NODE:
13814 case XML_DOCUMENT_NODE:
13815 case XML_HTML_DOCUMENT_NODE:
13816 break;
13817 default:
13818 return(XML_ERR_INTERNAL_ERROR);
13819
13820 }
13821 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13822 (node->type != XML_DOCUMENT_NODE) &&
13823 (node->type != XML_HTML_DOCUMENT_NODE))
13824 node = node->parent;
13825 if (node == NULL)
13826 return(XML_ERR_INTERNAL_ERROR);
13827 if (node->type == XML_ELEMENT_NODE)
13828 doc = node->doc;
13829 else
13830 doc = (xmlDocPtr) node;
13831 if (doc == NULL)
13832 return(XML_ERR_INTERNAL_ERROR);
13833
13834 /*
13835 * allocate a context and set-up everything not related to the
13836 * node position in the tree
13837 */
13838 if (doc->type == XML_DOCUMENT_NODE)
13839 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13840 #ifdef LIBXML_HTML_ENABLED
13841 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13842 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13843 /*
13844 * When parsing in context, it makes no sense to add implied
13845 * elements like html/body/etc...
13846 */
13847 options |= HTML_PARSE_NOIMPLIED;
13848 }
13849 #endif
13850 else
13851 return(XML_ERR_INTERNAL_ERROR);
13852
13853 if (ctxt == NULL)
13854 return(XML_ERR_NO_MEMORY);
13855
13856 /*
13857 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13858 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13859 * we must wait until the last moment to free the original one.
13860 */
13861 if (doc->dict != NULL) {
13862 if (ctxt->dict != NULL)
13863 xmlDictFree(ctxt->dict);
13864 ctxt->dict = doc->dict;
13865 } else
13866 options |= XML_PARSE_NODICT;
13867
13868 if (doc->encoding != NULL) {
13869 xmlCharEncodingHandlerPtr hdlr;
13870
13871 if (ctxt->encoding != NULL)
13872 xmlFree((xmlChar *) ctxt->encoding);
13873 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13874
13875 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13876 if (hdlr != NULL) {
13877 xmlSwitchToEncoding(ctxt, hdlr);
13878 } else {
13879 return(XML_ERR_UNSUPPORTED_ENCODING);
13880 }
13881 }
13882
13883 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13884 xmlDetectSAX2(ctxt);
13885 ctxt->myDoc = doc;
13886 /* parsing in context, i.e. as within existing content */
13887 ctxt->instate = XML_PARSER_CONTENT;
13888
13889 fake = xmlNewComment(NULL);
13890 if (fake == NULL) {
13891 xmlFreeParserCtxt(ctxt);
13892 return(XML_ERR_NO_MEMORY);
13893 }
13894 xmlAddChild(node, fake);
13895
13896 if (node->type == XML_ELEMENT_NODE) {
13897 nodePush(ctxt, node);
13898 /*
13899 * initialize the SAX2 namespaces stack
13900 */
13901 cur = node;
13902 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13903 xmlNsPtr ns = cur->nsDef;
13904 const xmlChar *iprefix, *ihref;
13905
13906 while (ns != NULL) {
13907 if (ctxt->dict) {
13908 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13909 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13910 } else {
13911 iprefix = ns->prefix;
13912 ihref = ns->href;
13913 }
13914
13915 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13916 nsPush(ctxt, iprefix, ihref);
13917 nsnr++;
13918 }
13919 ns = ns->next;
13920 }
13921 cur = cur->parent;
13922 }
13923 }
13924
13925 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13926 /*
13927 * ID/IDREF registration will be done in xmlValidateElement below
13928 */
13929 ctxt->loadsubset |= XML_SKIP_IDS;
13930 }
13931
13932 #ifdef LIBXML_HTML_ENABLED
13933 if (doc->type == XML_HTML_DOCUMENT_NODE)
13934 __htmlParseContent(ctxt);
13935 else
13936 #endif
13937 xmlParseContent(ctxt);
13938
13939 nsPop(ctxt, nsnr);
13940 if ((RAW == '<') && (NXT(1) == '/')) {
13941 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13942 } else if (RAW != 0) {
13943 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13944 }
13945 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13946 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13947 ctxt->wellFormed = 0;
13948 }
13949
13950 if (!ctxt->wellFormed) {
13951 if (ctxt->errNo == 0)
13952 ret = XML_ERR_INTERNAL_ERROR;
13953 else
13954 ret = (xmlParserErrors)ctxt->errNo;
13955 } else {
13956 ret = XML_ERR_OK;
13957 }
13958
13959 /*
13960 * Return the newly created nodeset after unlinking it from
13961 * the pseudo sibling.
13962 */
13963
13964 cur = fake->next;
13965 fake->next = NULL;
13966 node->last = fake;
13967
13968 if (cur != NULL) {
13969 cur->prev = NULL;
13970 }
13971
13972 *lst = cur;
13973
13974 while (cur != NULL) {
13975 cur->parent = NULL;
13976 cur = cur->next;
13977 }
13978
13979 xmlUnlinkNode(fake);
13980 xmlFreeNode(fake);
13981
13982
13983 if (ret != XML_ERR_OK) {
13984 xmlFreeNodeList(*lst);
13985 *lst = NULL;
13986 }
13987
13988 if (doc->dict != NULL)
13989 ctxt->dict = NULL;
13990 xmlFreeParserCtxt(ctxt);
13991
13992 return(ret);
13993 #else /* !SAX2 */
13994 return(XML_ERR_INTERNAL_ERROR);
13995 #endif
13996 }
13997
13998 #ifdef LIBXML_SAX1_ENABLED
13999 /**
14000 * xmlParseBalancedChunkMemoryRecover:
14001 * @doc: the document the chunk pertains to
14002 * @sax: the SAX handler bloc (possibly NULL)
14003 * @user_data: The user data returned on SAX callbacks (possibly NULL)
14004 * @depth: Used for loop detection, use 0
14005 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
14006 * @lst: the return value for the set of parsed nodes
14007 * @recover: return nodes even if the data is broken (use 0)
14008 *
14009 *
14010 * Parse a well-balanced chunk of an XML document
14011 * called by the parser
14012 * The allowed sequence for the Well Balanced Chunk is the one defined by
14013 * the content production in the XML grammar:
14014 *
14015 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
14016 *
14017 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
14018 * the parser error code otherwise
14019 *
14020 * In case recover is set to 1, the nodelist will not be empty even if
14021 * the parsed chunk is not well balanced, assuming the parsing succeeded to
14022 * some extent.
14023 */
14024 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst,int recover)14025 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
14026 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
14027 int recover) {
14028 xmlParserCtxtPtr ctxt;
14029 xmlDocPtr newDoc;
14030 xmlSAXHandlerPtr oldsax = NULL;
14031 xmlNodePtr content, newRoot;
14032 int size;
14033 int ret = 0;
14034
14035 if (depth > 40) {
14036 return(XML_ERR_ENTITY_LOOP);
14037 }
14038
14039
14040 if (lst != NULL)
14041 *lst = NULL;
14042 if (string == NULL)
14043 return(-1);
14044
14045 size = xmlStrlen(string);
14046
14047 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
14048 if (ctxt == NULL) return(-1);
14049 ctxt->userData = ctxt;
14050 if (sax != NULL) {
14051 oldsax = ctxt->sax;
14052 ctxt->sax = sax;
14053 if (user_data != NULL)
14054 ctxt->userData = user_data;
14055 }
14056 newDoc = xmlNewDoc(BAD_CAST "1.0");
14057 if (newDoc == NULL) {
14058 xmlFreeParserCtxt(ctxt);
14059 return(-1);
14060 }
14061 newDoc->properties = XML_DOC_INTERNAL;
14062 if ((doc != NULL) && (doc->dict != NULL)) {
14063 xmlDictFree(ctxt->dict);
14064 ctxt->dict = doc->dict;
14065 xmlDictReference(ctxt->dict);
14066 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
14067 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
14068 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
14069 ctxt->dictNames = 1;
14070 } else {
14071 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
14072 }
14073 if (doc != NULL) {
14074 newDoc->intSubset = doc->intSubset;
14075 newDoc->extSubset = doc->extSubset;
14076 }
14077 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
14078 if (newRoot == NULL) {
14079 if (sax != NULL)
14080 ctxt->sax = oldsax;
14081 xmlFreeParserCtxt(ctxt);
14082 newDoc->intSubset = NULL;
14083 newDoc->extSubset = NULL;
14084 xmlFreeDoc(newDoc);
14085 return(-1);
14086 }
14087 xmlAddChild((xmlNodePtr) newDoc, newRoot);
14088 nodePush(ctxt, newRoot);
14089 if (doc == NULL) {
14090 ctxt->myDoc = newDoc;
14091 } else {
14092 ctxt->myDoc = newDoc;
14093 newDoc->children->doc = doc;
14094 /* Ensure that doc has XML spec namespace */
14095 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
14096 newDoc->oldNs = doc->oldNs;
14097 }
14098 ctxt->instate = XML_PARSER_CONTENT;
14099 ctxt->depth = depth;
14100
14101 /*
14102 * Doing validity checking on chunk doesn't make sense
14103 */
14104 ctxt->validate = 0;
14105 ctxt->loadsubset = 0;
14106 xmlDetectSAX2(ctxt);
14107
14108 if ( doc != NULL ){
14109 content = doc->children;
14110 doc->children = NULL;
14111 xmlParseContent(ctxt);
14112 doc->children = content;
14113 }
14114 else {
14115 xmlParseContent(ctxt);
14116 }
14117 if ((RAW == '<') && (NXT(1) == '/')) {
14118 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
14119 } else if (RAW != 0) {
14120 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
14121 }
14122 if (ctxt->node != newDoc->children) {
14123 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
14124 }
14125
14126 if (!ctxt->wellFormed) {
14127 if (ctxt->errNo == 0)
14128 ret = 1;
14129 else
14130 ret = ctxt->errNo;
14131 } else {
14132 ret = 0;
14133 }
14134
14135 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
14136 xmlNodePtr cur;
14137
14138 /*
14139 * Return the newly created nodeset after unlinking it from
14140 * they pseudo parent.
14141 */
14142 cur = newDoc->children->children;
14143 *lst = cur;
14144 while (cur != NULL) {
14145 xmlSetTreeDoc(cur, doc);
14146 cur->parent = NULL;
14147 cur = cur->next;
14148 }
14149 newDoc->children->children = NULL;
14150 }
14151
14152 if (sax != NULL)
14153 ctxt->sax = oldsax;
14154 xmlFreeParserCtxt(ctxt);
14155 newDoc->intSubset = NULL;
14156 newDoc->extSubset = NULL;
14157 newDoc->oldNs = NULL;
14158 xmlFreeDoc(newDoc);
14159
14160 return(ret);
14161 }
14162
14163 /**
14164 * xmlSAXParseEntity:
14165 * @sax: the SAX handler block
14166 * @filename: the filename
14167 *
14168 * parse an XML external entity out of context and build a tree.
14169 * It use the given SAX function block to handle the parsing callback.
14170 * If sax is NULL, fallback to the default DOM tree building routines.
14171 *
14172 * [78] extParsedEnt ::= TextDecl? content
14173 *
14174 * This correspond to a "Well Balanced" chunk
14175 *
14176 * Returns the resulting document tree
14177 */
14178
14179 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)14180 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
14181 xmlDocPtr ret;
14182 xmlParserCtxtPtr ctxt;
14183
14184 ctxt = xmlCreateFileParserCtxt(filename);
14185 if (ctxt == NULL) {
14186 return(NULL);
14187 }
14188 if (sax != NULL) {
14189 if (ctxt->sax != NULL)
14190 xmlFree(ctxt->sax);
14191 ctxt->sax = sax;
14192 ctxt->userData = NULL;
14193 }
14194
14195 xmlParseExtParsedEnt(ctxt);
14196
14197 if (ctxt->wellFormed)
14198 ret = ctxt->myDoc;
14199 else {
14200 ret = NULL;
14201 xmlFreeDoc(ctxt->myDoc);
14202 ctxt->myDoc = NULL;
14203 }
14204 if (sax != NULL)
14205 ctxt->sax = NULL;
14206 xmlFreeParserCtxt(ctxt);
14207
14208 return(ret);
14209 }
14210
14211 /**
14212 * xmlParseEntity:
14213 * @filename: the filename
14214 *
14215 * parse an XML external entity out of context and build a tree.
14216 *
14217 * [78] extParsedEnt ::= TextDecl? content
14218 *
14219 * This correspond to a "Well Balanced" chunk
14220 *
14221 * Returns the resulting document tree
14222 */
14223
14224 xmlDocPtr
xmlParseEntity(const char * filename)14225 xmlParseEntity(const char *filename) {
14226 return(xmlSAXParseEntity(NULL, filename));
14227 }
14228 #endif /* LIBXML_SAX1_ENABLED */
14229
14230 /**
14231 * xmlCreateEntityParserCtxtInternal:
14232 * @URL: the entity URL
14233 * @ID: the entity PUBLIC ID
14234 * @base: a possible base for the target URI
14235 * @pctx: parser context used to set options on new context
14236 *
14237 * Create a parser context for an external entity
14238 * Automatic support for ZLIB/Compress compressed document is provided
14239 * by default if found at compile-time.
14240 *
14241 * Returns the new parser context or NULL
14242 */
14243 static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(const xmlChar * URL,const xmlChar * ID,const xmlChar * base,xmlParserCtxtPtr pctx)14244 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
14245 const xmlChar *base, xmlParserCtxtPtr pctx) {
14246 xmlParserCtxtPtr ctxt;
14247 xmlParserInputPtr inputStream;
14248 char *directory = NULL;
14249 xmlChar *uri;
14250
14251 ctxt = xmlNewParserCtxt();
14252 if (ctxt == NULL) {
14253 return(NULL);
14254 }
14255
14256 if (pctx != NULL) {
14257 ctxt->options = pctx->options;
14258 ctxt->_private = pctx->_private;
14259 }
14260
14261 uri = xmlBuildURI(URL, base);
14262
14263 if (uri == NULL) {
14264 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14265 if (inputStream == NULL) {
14266 xmlFreeParserCtxt(ctxt);
14267 return(NULL);
14268 }
14269
14270 inputPush(ctxt, inputStream);
14271
14272 if ((ctxt->directory == NULL) && (directory == NULL))
14273 directory = xmlParserGetDirectory((char *)URL);
14274 if ((ctxt->directory == NULL) && (directory != NULL))
14275 ctxt->directory = directory;
14276 } else {
14277 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14278 if (inputStream == NULL) {
14279 xmlFree(uri);
14280 xmlFreeParserCtxt(ctxt);
14281 return(NULL);
14282 }
14283
14284 inputPush(ctxt, inputStream);
14285
14286 if ((ctxt->directory == NULL) && (directory == NULL))
14287 directory = xmlParserGetDirectory((char *)uri);
14288 if ((ctxt->directory == NULL) && (directory != NULL))
14289 ctxt->directory = directory;
14290 xmlFree(uri);
14291 }
14292 return(ctxt);
14293 }
14294
14295 /**
14296 * xmlCreateEntityParserCtxt:
14297 * @URL: the entity URL
14298 * @ID: the entity PUBLIC ID
14299 * @base: a possible base for the target URI
14300 *
14301 * Create a parser context for an external entity
14302 * Automatic support for ZLIB/Compress compressed document is provided
14303 * by default if found at compile-time.
14304 *
14305 * Returns the new parser context or NULL
14306 */
14307 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)14308 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14309 const xmlChar *base) {
14310 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14311
14312 }
14313
14314 /************************************************************************
14315 * *
14316 * Front ends when parsing from a file *
14317 * *
14318 ************************************************************************/
14319
14320 /**
14321 * xmlCreateURLParserCtxt:
14322 * @filename: the filename or URL
14323 * @options: a combination of xmlParserOption
14324 *
14325 * Create a parser context for a file or URL content.
14326 * Automatic support for ZLIB/Compress compressed document is provided
14327 * by default if found at compile-time and for file accesses
14328 *
14329 * Returns the new parser context or NULL
14330 */
14331 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)14332 xmlCreateURLParserCtxt(const char *filename, int options)
14333 {
14334 xmlParserCtxtPtr ctxt;
14335 xmlParserInputPtr inputStream;
14336 char *directory = NULL;
14337
14338 ctxt = xmlNewParserCtxt();
14339 if (ctxt == NULL) {
14340 xmlErrMemory(NULL, "cannot allocate parser context");
14341 return(NULL);
14342 }
14343
14344 if (options)
14345 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14346 ctxt->linenumbers = 1;
14347
14348 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14349 if (inputStream == NULL) {
14350 xmlFreeParserCtxt(ctxt);
14351 return(NULL);
14352 }
14353
14354 inputPush(ctxt, inputStream);
14355 if ((ctxt->directory == NULL) && (directory == NULL))
14356 directory = xmlParserGetDirectory(filename);
14357 if ((ctxt->directory == NULL) && (directory != NULL))
14358 ctxt->directory = directory;
14359
14360 return(ctxt);
14361 }
14362
14363 /**
14364 * xmlCreateFileParserCtxt:
14365 * @filename: the filename
14366 *
14367 * Create a parser context for a file content.
14368 * Automatic support for ZLIB/Compress compressed document is provided
14369 * by default if found at compile-time.
14370 *
14371 * Returns the new parser context or NULL
14372 */
14373 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)14374 xmlCreateFileParserCtxt(const char *filename)
14375 {
14376 return(xmlCreateURLParserCtxt(filename, 0));
14377 }
14378
14379 #ifdef LIBXML_SAX1_ENABLED
14380 /**
14381 * xmlSAXParseFileWithData:
14382 * @sax: the SAX handler block
14383 * @filename: the filename
14384 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14385 * documents
14386 * @data: the userdata
14387 *
14388 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14389 * compressed document is provided by default if found at compile-time.
14390 * It use the given SAX function block to handle the parsing callback.
14391 * If sax is NULL, fallback to the default DOM tree building routines.
14392 *
14393 * User data (void *) is stored within the parser context in the
14394 * context's _private member, so it is available nearly everywhere in libxml
14395 *
14396 * Returns the resulting document tree
14397 */
14398
14399 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)14400 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14401 int recovery, void *data) {
14402 xmlDocPtr ret;
14403 xmlParserCtxtPtr ctxt;
14404
14405 xmlInitParser();
14406
14407 ctxt = xmlCreateFileParserCtxt(filename);
14408 if (ctxt == NULL) {
14409 return(NULL);
14410 }
14411 if (sax != NULL) {
14412 if (ctxt->sax != NULL)
14413 xmlFree(ctxt->sax);
14414 ctxt->sax = sax;
14415 }
14416 xmlDetectSAX2(ctxt);
14417 if (data!=NULL) {
14418 ctxt->_private = data;
14419 }
14420
14421 if (ctxt->directory == NULL)
14422 ctxt->directory = xmlParserGetDirectory(filename);
14423
14424 ctxt->recovery = recovery;
14425
14426 xmlParseDocument(ctxt);
14427
14428 if ((ctxt->wellFormed) || recovery) {
14429 ret = ctxt->myDoc;
14430 if (ret != NULL) {
14431 if (ctxt->input->buf->compressed > 0)
14432 ret->compression = 9;
14433 else
14434 ret->compression = ctxt->input->buf->compressed;
14435 }
14436 }
14437 else {
14438 ret = NULL;
14439 xmlFreeDoc(ctxt->myDoc);
14440 ctxt->myDoc = NULL;
14441 }
14442 if (sax != NULL)
14443 ctxt->sax = NULL;
14444 xmlFreeParserCtxt(ctxt);
14445
14446 return(ret);
14447 }
14448
14449 /**
14450 * xmlSAXParseFile:
14451 * @sax: the SAX handler block
14452 * @filename: the filename
14453 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14454 * documents
14455 *
14456 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14457 * compressed document is provided by default if found at compile-time.
14458 * It use the given SAX function block to handle the parsing callback.
14459 * If sax is NULL, fallback to the default DOM tree building routines.
14460 *
14461 * Returns the resulting document tree
14462 */
14463
14464 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)14465 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14466 int recovery) {
14467 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14468 }
14469
14470 /**
14471 * xmlRecoverDoc:
14472 * @cur: a pointer to an array of xmlChar
14473 *
14474 * parse an XML in-memory document and build a tree.
14475 * In the case the document is not Well Formed, a attempt to build a
14476 * tree is tried anyway
14477 *
14478 * Returns the resulting document tree or NULL in case of failure
14479 */
14480
14481 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)14482 xmlRecoverDoc(const xmlChar *cur) {
14483 return(xmlSAXParseDoc(NULL, cur, 1));
14484 }
14485
14486 /**
14487 * xmlParseFile:
14488 * @filename: the filename
14489 *
14490 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14491 * compressed document is provided by default if found at compile-time.
14492 *
14493 * Returns the resulting document tree if the file was wellformed,
14494 * NULL otherwise.
14495 */
14496
14497 xmlDocPtr
xmlParseFile(const char * filename)14498 xmlParseFile(const char *filename) {
14499 return(xmlSAXParseFile(NULL, filename, 0));
14500 }
14501
14502 /**
14503 * xmlRecoverFile:
14504 * @filename: the filename
14505 *
14506 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14507 * compressed document is provided by default if found at compile-time.
14508 * In the case the document is not Well Formed, it attempts to build
14509 * a tree anyway
14510 *
14511 * Returns the resulting document tree or NULL in case of failure
14512 */
14513
14514 xmlDocPtr
xmlRecoverFile(const char * filename)14515 xmlRecoverFile(const char *filename) {
14516 return(xmlSAXParseFile(NULL, filename, 1));
14517 }
14518
14519
14520 /**
14521 * xmlSetupParserForBuffer:
14522 * @ctxt: an XML parser context
14523 * @buffer: a xmlChar * buffer
14524 * @filename: a file name
14525 *
14526 * Setup the parser context to parse a new buffer; Clears any prior
14527 * contents from the parser context. The buffer parameter must not be
14528 * NULL, but the filename parameter can be
14529 */
14530 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)14531 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14532 const char* filename)
14533 {
14534 xmlParserInputPtr input;
14535
14536 if ((ctxt == NULL) || (buffer == NULL))
14537 return;
14538
14539 input = xmlNewInputStream(ctxt);
14540 if (input == NULL) {
14541 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14542 xmlClearParserCtxt(ctxt);
14543 return;
14544 }
14545
14546 xmlClearParserCtxt(ctxt);
14547 if (filename != NULL)
14548 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14549 input->base = buffer;
14550 input->cur = buffer;
14551 input->end = &buffer[xmlStrlen(buffer)];
14552 inputPush(ctxt, input);
14553 }
14554
14555 /**
14556 * xmlSAXUserParseFile:
14557 * @sax: a SAX handler
14558 * @user_data: The user data returned on SAX callbacks
14559 * @filename: a file name
14560 *
14561 * parse an XML file and call the given SAX handler routines.
14562 * Automatic support for ZLIB/Compress compressed document is provided
14563 *
14564 * Returns 0 in case of success or a error number otherwise
14565 */
14566 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)14567 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14568 const char *filename) {
14569 int ret = 0;
14570 xmlParserCtxtPtr ctxt;
14571
14572 ctxt = xmlCreateFileParserCtxt(filename);
14573 if (ctxt == NULL) return -1;
14574 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14575 xmlFree(ctxt->sax);
14576 ctxt->sax = sax;
14577 xmlDetectSAX2(ctxt);
14578
14579 if (user_data != NULL)
14580 ctxt->userData = user_data;
14581
14582 xmlParseDocument(ctxt);
14583
14584 if (ctxt->wellFormed)
14585 ret = 0;
14586 else {
14587 if (ctxt->errNo != 0)
14588 ret = ctxt->errNo;
14589 else
14590 ret = -1;
14591 }
14592 if (sax != NULL)
14593 ctxt->sax = NULL;
14594 if (ctxt->myDoc != NULL) {
14595 xmlFreeDoc(ctxt->myDoc);
14596 ctxt->myDoc = NULL;
14597 }
14598 xmlFreeParserCtxt(ctxt);
14599
14600 return ret;
14601 }
14602 #endif /* LIBXML_SAX1_ENABLED */
14603
14604 /************************************************************************
14605 * *
14606 * Front ends when parsing from memory *
14607 * *
14608 ************************************************************************/
14609
14610 /**
14611 * xmlCreateMemoryParserCtxt:
14612 * @buffer: a pointer to a char array
14613 * @size: the size of the array
14614 *
14615 * Create a parser context for an XML in-memory document.
14616 *
14617 * Returns the new parser context or NULL
14618 */
14619 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)14620 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14621 xmlParserCtxtPtr ctxt;
14622 xmlParserInputPtr input;
14623 xmlParserInputBufferPtr buf;
14624
14625 if (buffer == NULL)
14626 return(NULL);
14627 if (size <= 0)
14628 return(NULL);
14629
14630 ctxt = xmlNewParserCtxt();
14631 if (ctxt == NULL)
14632 return(NULL);
14633
14634 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14635 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14636 if (buf == NULL) {
14637 xmlFreeParserCtxt(ctxt);
14638 return(NULL);
14639 }
14640
14641 input = xmlNewInputStream(ctxt);
14642 if (input == NULL) {
14643 xmlFreeParserInputBuffer(buf);
14644 xmlFreeParserCtxt(ctxt);
14645 return(NULL);
14646 }
14647
14648 input->filename = NULL;
14649 input->buf = buf;
14650 xmlBufResetInput(input->buf->buffer, input);
14651
14652 inputPush(ctxt, input);
14653 return(ctxt);
14654 }
14655
14656 #ifdef LIBXML_SAX1_ENABLED
14657 /**
14658 * xmlSAXParseMemoryWithData:
14659 * @sax: the SAX handler block
14660 * @buffer: an pointer to a char array
14661 * @size: the size of the array
14662 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14663 * documents
14664 * @data: the userdata
14665 *
14666 * parse an XML in-memory block and use the given SAX function block
14667 * to handle the parsing callback. If sax is NULL, fallback to the default
14668 * DOM tree building routines.
14669 *
14670 * User data (void *) is stored within the parser context in the
14671 * context's _private member, so it is available nearly everywhere in libxml
14672 *
14673 * Returns the resulting document tree
14674 */
14675
14676 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)14677 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14678 int size, int recovery, void *data) {
14679 xmlDocPtr ret;
14680 xmlParserCtxtPtr ctxt;
14681
14682 xmlInitParser();
14683
14684 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14685 if (ctxt == NULL) return(NULL);
14686 if (sax != NULL) {
14687 if (ctxt->sax != NULL)
14688 xmlFree(ctxt->sax);
14689 ctxt->sax = sax;
14690 }
14691 xmlDetectSAX2(ctxt);
14692 if (data!=NULL) {
14693 ctxt->_private=data;
14694 }
14695
14696 ctxt->recovery = recovery;
14697
14698 xmlParseDocument(ctxt);
14699
14700 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14701 else {
14702 ret = NULL;
14703 xmlFreeDoc(ctxt->myDoc);
14704 ctxt->myDoc = NULL;
14705 }
14706 if (sax != NULL)
14707 ctxt->sax = NULL;
14708 xmlFreeParserCtxt(ctxt);
14709
14710 return(ret);
14711 }
14712
14713 /**
14714 * xmlSAXParseMemory:
14715 * @sax: the SAX handler block
14716 * @buffer: an pointer to a char array
14717 * @size: the size of the array
14718 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14719 * documents
14720 *
14721 * parse an XML in-memory block and use the given SAX function block
14722 * to handle the parsing callback. If sax is NULL, fallback to the default
14723 * DOM tree building routines.
14724 *
14725 * Returns the resulting document tree
14726 */
14727 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)14728 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14729 int size, int recovery) {
14730 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14731 }
14732
14733 /**
14734 * xmlParseMemory:
14735 * @buffer: an pointer to a char array
14736 * @size: the size of the array
14737 *
14738 * parse an XML in-memory block and build a tree.
14739 *
14740 * Returns the resulting document tree
14741 */
14742
xmlParseMemory(const char * buffer,int size)14743 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14744 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14745 }
14746
14747 /**
14748 * xmlRecoverMemory:
14749 * @buffer: an pointer to a char array
14750 * @size: the size of the array
14751 *
14752 * parse an XML in-memory block and build a tree.
14753 * In the case the document is not Well Formed, an attempt to
14754 * build a tree is tried anyway
14755 *
14756 * Returns the resulting document tree or NULL in case of error
14757 */
14758
xmlRecoverMemory(const char * buffer,int size)14759 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14760 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14761 }
14762
14763 /**
14764 * xmlSAXUserParseMemory:
14765 * @sax: a SAX handler
14766 * @user_data: The user data returned on SAX callbacks
14767 * @buffer: an in-memory XML document input
14768 * @size: the length of the XML document in bytes
14769 *
14770 * A better SAX parsing routine.
14771 * parse an XML in-memory buffer and call the given SAX handler routines.
14772 *
14773 * Returns 0 in case of success or a error number otherwise
14774 */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)14775 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14776 const char *buffer, int size) {
14777 int ret = 0;
14778 xmlParserCtxtPtr ctxt;
14779
14780 xmlInitParser();
14781
14782 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14783 if (ctxt == NULL) return -1;
14784 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14785 xmlFree(ctxt->sax);
14786 ctxt->sax = sax;
14787 xmlDetectSAX2(ctxt);
14788
14789 if (user_data != NULL)
14790 ctxt->userData = user_data;
14791
14792 xmlParseDocument(ctxt);
14793
14794 if (ctxt->wellFormed)
14795 ret = 0;
14796 else {
14797 if (ctxt->errNo != 0)
14798 ret = ctxt->errNo;
14799 else
14800 ret = -1;
14801 }
14802 if (sax != NULL)
14803 ctxt->sax = NULL;
14804 if (ctxt->myDoc != NULL) {
14805 xmlFreeDoc(ctxt->myDoc);
14806 ctxt->myDoc = NULL;
14807 }
14808 xmlFreeParserCtxt(ctxt);
14809
14810 return ret;
14811 }
14812 #endif /* LIBXML_SAX1_ENABLED */
14813
14814 /**
14815 * xmlCreateDocParserCtxt:
14816 * @cur: a pointer to an array of xmlChar
14817 *
14818 * Creates a parser context for an XML in-memory document.
14819 *
14820 * Returns the new parser context or NULL
14821 */
14822 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * cur)14823 xmlCreateDocParserCtxt(const xmlChar *cur) {
14824 int len;
14825
14826 if (cur == NULL)
14827 return(NULL);
14828 len = xmlStrlen(cur);
14829 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14830 }
14831
14832 #ifdef LIBXML_SAX1_ENABLED
14833 /**
14834 * xmlSAXParseDoc:
14835 * @sax: the SAX handler block
14836 * @cur: a pointer to an array of xmlChar
14837 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14838 * documents
14839 *
14840 * parse an XML in-memory document and build a tree.
14841 * It use the given SAX function block to handle the parsing callback.
14842 * If sax is NULL, fallback to the default DOM tree building routines.
14843 *
14844 * Returns the resulting document tree
14845 */
14846
14847 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)14848 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14849 xmlDocPtr ret;
14850 xmlParserCtxtPtr ctxt;
14851 xmlSAXHandlerPtr oldsax = NULL;
14852
14853 if (cur == NULL) return(NULL);
14854
14855
14856 ctxt = xmlCreateDocParserCtxt(cur);
14857 if (ctxt == NULL) return(NULL);
14858 if (sax != NULL) {
14859 oldsax = ctxt->sax;
14860 ctxt->sax = sax;
14861 ctxt->userData = NULL;
14862 }
14863 xmlDetectSAX2(ctxt);
14864
14865 xmlParseDocument(ctxt);
14866 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14867 else {
14868 ret = NULL;
14869 xmlFreeDoc(ctxt->myDoc);
14870 ctxt->myDoc = NULL;
14871 }
14872 if (sax != NULL)
14873 ctxt->sax = oldsax;
14874 xmlFreeParserCtxt(ctxt);
14875
14876 return(ret);
14877 }
14878
14879 /**
14880 * xmlParseDoc:
14881 * @cur: a pointer to an array of xmlChar
14882 *
14883 * parse an XML in-memory document and build a tree.
14884 *
14885 * Returns the resulting document tree
14886 */
14887
14888 xmlDocPtr
xmlParseDoc(const xmlChar * cur)14889 xmlParseDoc(const xmlChar *cur) {
14890 return(xmlSAXParseDoc(NULL, cur, 0));
14891 }
14892 #endif /* LIBXML_SAX1_ENABLED */
14893
14894 #ifdef LIBXML_LEGACY_ENABLED
14895 /************************************************************************
14896 * *
14897 * Specific function to keep track of entities references *
14898 * and used by the XSLT debugger *
14899 * *
14900 ************************************************************************/
14901
14902 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14903
14904 /**
14905 * xmlAddEntityReference:
14906 * @ent : A valid entity
14907 * @firstNode : A valid first node for children of entity
14908 * @lastNode : A valid last node of children entity
14909 *
14910 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14911 */
14912 static void
xmlAddEntityReference(xmlEntityPtr ent,xmlNodePtr firstNode,xmlNodePtr lastNode)14913 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14914 xmlNodePtr lastNode)
14915 {
14916 if (xmlEntityRefFunc != NULL) {
14917 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14918 }
14919 }
14920
14921
14922 /**
14923 * xmlSetEntityReferenceFunc:
14924 * @func: A valid function
14925 *
14926 * Set the function to call call back when a xml reference has been made
14927 */
14928 void
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)14929 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14930 {
14931 xmlEntityRefFunc = func;
14932 }
14933 #endif /* LIBXML_LEGACY_ENABLED */
14934
14935 /************************************************************************
14936 * *
14937 * Miscellaneous *
14938 * *
14939 ************************************************************************/
14940
14941 #ifdef LIBXML_XPATH_ENABLED
14942 #include <libxml/xpath.h>
14943 #endif
14944
14945 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14946 static int xmlParserInitialized = 0;
14947
14948 /**
14949 * xmlInitParser:
14950 *
14951 * Initialization function for the XML parser.
14952 * This is not reentrant. Call once before processing in case of
14953 * use in multithreaded programs.
14954 */
14955
14956 void
xmlInitParser(void)14957 xmlInitParser(void) {
14958 if (xmlParserInitialized != 0)
14959 return;
14960
14961 #ifdef LIBXML_THREAD_ENABLED
14962 __xmlGlobalInitMutexLock();
14963 if (xmlParserInitialized == 0) {
14964 #endif
14965 xmlInitThreads();
14966 xmlInitGlobals();
14967 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14968 (xmlGenericError == NULL))
14969 initGenericErrorDefaultFunc(NULL);
14970 xmlInitMemory();
14971 xmlInitializeDict();
14972 xmlInitCharEncodingHandlers();
14973 xmlDefaultSAXHandlerInit();
14974 xmlRegisterDefaultInputCallbacks();
14975 #ifdef LIBXML_OUTPUT_ENABLED
14976 xmlRegisterDefaultOutputCallbacks();
14977 #endif /* LIBXML_OUTPUT_ENABLED */
14978 #ifdef LIBXML_HTML_ENABLED
14979 htmlInitAutoClose();
14980 htmlDefaultSAXHandlerInit();
14981 #endif
14982 #ifdef LIBXML_XPATH_ENABLED
14983 xmlXPathInit();
14984 #endif
14985 xmlParserInitialized = 1;
14986 #ifdef LIBXML_THREAD_ENABLED
14987 }
14988 __xmlGlobalInitMutexUnlock();
14989 #endif
14990 }
14991
14992 /**
14993 * xmlCleanupParser:
14994 *
14995 * This function name is somewhat misleading. It does not clean up
14996 * parser state, it cleans up memory allocated by the library itself.
14997 * It is a cleanup function for the XML library. It tries to reclaim all
14998 * related global memory allocated for the library processing.
14999 * It doesn't deallocate any document related memory. One should
15000 * call xmlCleanupParser() only when the process has finished using
15001 * the library and all XML/HTML documents built with it.
15002 * See also xmlInitParser() which has the opposite function of preparing
15003 * the library for operations.
15004 *
15005 * WARNING: if your application is multithreaded or has plugin support
15006 * calling this may crash the application if another thread or
15007 * a plugin is still using libxml2. It's sometimes very hard to
15008 * guess if libxml2 is in use in the application, some libraries
15009 * or plugins may use it without notice. In case of doubt abstain
15010 * from calling this function or do it just before calling exit()
15011 * to avoid leak reports from valgrind !
15012 */
15013
15014 void
xmlCleanupParser(void)15015 xmlCleanupParser(void) {
15016 if (!xmlParserInitialized)
15017 return;
15018
15019 xmlCleanupCharEncodingHandlers();
15020 #ifdef LIBXML_CATALOG_ENABLED
15021 xmlCatalogCleanup();
15022 #endif
15023 xmlDictCleanup();
15024 xmlCleanupInputCallbacks();
15025 #ifdef LIBXML_OUTPUT_ENABLED
15026 xmlCleanupOutputCallbacks();
15027 #endif
15028 #ifdef LIBXML_SCHEMAS_ENABLED
15029 xmlSchemaCleanupTypes();
15030 xmlRelaxNGCleanupTypes();
15031 #endif
15032 xmlResetLastError();
15033 xmlCleanupGlobals();
15034 xmlCleanupThreads(); /* must be last if called not from the main thread */
15035 xmlCleanupMemory();
15036 xmlParserInitialized = 0;
15037 }
15038
15039 /************************************************************************
15040 * *
15041 * New set (2.6.0) of simpler and more flexible APIs *
15042 * *
15043 ************************************************************************/
15044
15045 /**
15046 * DICT_FREE:
15047 * @str: a string
15048 *
15049 * Free a string if it is not owned by the "dict" dictionary in the
15050 * current scope
15051 */
15052 #define DICT_FREE(str) \
15053 if ((str) && ((!dict) || \
15054 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
15055 xmlFree((char *)(str));
15056
15057 /**
15058 * xmlCtxtReset:
15059 * @ctxt: an XML parser context
15060 *
15061 * Reset a parser context
15062 */
15063 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)15064 xmlCtxtReset(xmlParserCtxtPtr ctxt)
15065 {
15066 xmlParserInputPtr input;
15067 xmlDictPtr dict;
15068
15069 if (ctxt == NULL)
15070 return;
15071
15072 dict = ctxt->dict;
15073
15074 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
15075 xmlFreeInputStream(input);
15076 }
15077 ctxt->inputNr = 0;
15078 ctxt->input = NULL;
15079
15080 ctxt->spaceNr = 0;
15081 if (ctxt->spaceTab != NULL) {
15082 ctxt->spaceTab[0] = -1;
15083 ctxt->space = &ctxt->spaceTab[0];
15084 } else {
15085 ctxt->space = NULL;
15086 }
15087
15088
15089 ctxt->nodeNr = 0;
15090 ctxt->node = NULL;
15091
15092 ctxt->nameNr = 0;
15093 ctxt->name = NULL;
15094
15095 DICT_FREE(ctxt->version);
15096 ctxt->version = NULL;
15097 DICT_FREE(ctxt->encoding);
15098 ctxt->encoding = NULL;
15099 DICT_FREE(ctxt->directory);
15100 ctxt->directory = NULL;
15101 DICT_FREE(ctxt->extSubURI);
15102 ctxt->extSubURI = NULL;
15103 DICT_FREE(ctxt->extSubSystem);
15104 ctxt->extSubSystem = NULL;
15105 if (ctxt->myDoc != NULL)
15106 xmlFreeDoc(ctxt->myDoc);
15107 ctxt->myDoc = NULL;
15108
15109 ctxt->standalone = -1;
15110 ctxt->hasExternalSubset = 0;
15111 ctxt->hasPErefs = 0;
15112 ctxt->html = 0;
15113 ctxt->external = 0;
15114 ctxt->instate = XML_PARSER_START;
15115 ctxt->token = 0;
15116
15117 ctxt->wellFormed = 1;
15118 ctxt->nsWellFormed = 1;
15119 ctxt->disableSAX = 0;
15120 ctxt->valid = 1;
15121 #if 0
15122 ctxt->vctxt.userData = ctxt;
15123 ctxt->vctxt.error = xmlParserValidityError;
15124 ctxt->vctxt.warning = xmlParserValidityWarning;
15125 #endif
15126 ctxt->record_info = 0;
15127 ctxt->nbChars = 0;
15128 ctxt->checkIndex = 0;
15129 ctxt->inSubset = 0;
15130 ctxt->errNo = XML_ERR_OK;
15131 ctxt->depth = 0;
15132 ctxt->charset = XML_CHAR_ENCODING_UTF8;
15133 ctxt->catalogs = NULL;
15134 ctxt->nbentities = 0;
15135 ctxt->sizeentities = 0;
15136 ctxt->sizeentcopy = 0;
15137 xmlInitNodeInfoSeq(&ctxt->node_seq);
15138
15139 if (ctxt->attsDefault != NULL) {
15140 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
15141 ctxt->attsDefault = NULL;
15142 }
15143 if (ctxt->attsSpecial != NULL) {
15144 xmlHashFree(ctxt->attsSpecial, NULL);
15145 ctxt->attsSpecial = NULL;
15146 }
15147
15148 #ifdef LIBXML_CATALOG_ENABLED
15149 if (ctxt->catalogs != NULL)
15150 xmlCatalogFreeLocal(ctxt->catalogs);
15151 #endif
15152 if (ctxt->lastError.code != XML_ERR_OK)
15153 xmlResetError(&ctxt->lastError);
15154 }
15155
15156 /**
15157 * xmlCtxtResetPush:
15158 * @ctxt: an XML parser context
15159 * @chunk: a pointer to an array of chars
15160 * @size: number of chars in the array
15161 * @filename: an optional file name or URI
15162 * @encoding: the document encoding, or NULL
15163 *
15164 * Reset a push parser context
15165 *
15166 * Returns 0 in case of success and 1 in case of error
15167 */
15168 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)15169 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
15170 int size, const char *filename, const char *encoding)
15171 {
15172 xmlParserInputPtr inputStream;
15173 xmlParserInputBufferPtr buf;
15174 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
15175
15176 if (ctxt == NULL)
15177 return(1);
15178
15179 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
15180 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
15181
15182 buf = xmlAllocParserInputBuffer(enc);
15183 if (buf == NULL)
15184 return(1);
15185
15186 if (ctxt == NULL) {
15187 xmlFreeParserInputBuffer(buf);
15188 return(1);
15189 }
15190
15191 xmlCtxtReset(ctxt);
15192
15193 if (ctxt->pushTab == NULL) {
15194 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
15195 sizeof(xmlChar *));
15196 if (ctxt->pushTab == NULL) {
15197 xmlErrMemory(ctxt, NULL);
15198 xmlFreeParserInputBuffer(buf);
15199 return(1);
15200 }
15201 }
15202
15203 if (filename == NULL) {
15204 ctxt->directory = NULL;
15205 } else {
15206 ctxt->directory = xmlParserGetDirectory(filename);
15207 }
15208
15209 inputStream = xmlNewInputStream(ctxt);
15210 if (inputStream == NULL) {
15211 xmlFreeParserInputBuffer(buf);
15212 return(1);
15213 }
15214
15215 if (filename == NULL)
15216 inputStream->filename = NULL;
15217 else
15218 inputStream->filename = (char *)
15219 xmlCanonicPath((const xmlChar *) filename);
15220 inputStream->buf = buf;
15221 xmlBufResetInput(buf->buffer, inputStream);
15222
15223 inputPush(ctxt, inputStream);
15224
15225 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
15226 (ctxt->input->buf != NULL)) {
15227 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
15228 size_t cur = ctxt->input->cur - ctxt->input->base;
15229
15230 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15231
15232 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
15233 #ifdef DEBUG_PUSH
15234 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15235 #endif
15236 }
15237
15238 if (encoding != NULL) {
15239 xmlCharEncodingHandlerPtr hdlr;
15240
15241 if (ctxt->encoding != NULL)
15242 xmlFree((xmlChar *) ctxt->encoding);
15243 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15244
15245 hdlr = xmlFindCharEncodingHandler(encoding);
15246 if (hdlr != NULL) {
15247 xmlSwitchToEncoding(ctxt, hdlr);
15248 } else {
15249 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15250 "Unsupported encoding %s\n", BAD_CAST encoding);
15251 }
15252 } else if (enc != XML_CHAR_ENCODING_NONE) {
15253 xmlSwitchEncoding(ctxt, enc);
15254 }
15255
15256 return(0);
15257 }
15258
15259
15260 /**
15261 * xmlCtxtUseOptionsInternal:
15262 * @ctxt: an XML parser context
15263 * @options: a combination of xmlParserOption
15264 * @encoding: the user provided encoding to use
15265 *
15266 * Applies the options to the parser context
15267 *
15268 * Returns 0 in case of success, the set of unknown or unimplemented options
15269 * in case of error.
15270 */
15271 static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt,int options,const char * encoding)15272 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15273 {
15274 if (ctxt == NULL)
15275 return(-1);
15276 if (encoding != NULL) {
15277 if (ctxt->encoding != NULL)
15278 xmlFree((xmlChar *) ctxt->encoding);
15279 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15280 }
15281 if (options & XML_PARSE_RECOVER) {
15282 ctxt->recovery = 1;
15283 options -= XML_PARSE_RECOVER;
15284 ctxt->options |= XML_PARSE_RECOVER;
15285 } else
15286 ctxt->recovery = 0;
15287 if (options & XML_PARSE_DTDLOAD) {
15288 ctxt->loadsubset = XML_DETECT_IDS;
15289 options -= XML_PARSE_DTDLOAD;
15290 ctxt->options |= XML_PARSE_DTDLOAD;
15291 } else
15292 ctxt->loadsubset = 0;
15293 if (options & XML_PARSE_DTDATTR) {
15294 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15295 options -= XML_PARSE_DTDATTR;
15296 ctxt->options |= XML_PARSE_DTDATTR;
15297 }
15298 if (options & XML_PARSE_NOENT) {
15299 ctxt->replaceEntities = 1;
15300 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15301 options -= XML_PARSE_NOENT;
15302 ctxt->options |= XML_PARSE_NOENT;
15303 } else
15304 ctxt->replaceEntities = 0;
15305 if (options & XML_PARSE_PEDANTIC) {
15306 ctxt->pedantic = 1;
15307 options -= XML_PARSE_PEDANTIC;
15308 ctxt->options |= XML_PARSE_PEDANTIC;
15309 } else
15310 ctxt->pedantic = 0;
15311 if (options & XML_PARSE_NOBLANKS) {
15312 ctxt->keepBlanks = 0;
15313 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15314 options -= XML_PARSE_NOBLANKS;
15315 ctxt->options |= XML_PARSE_NOBLANKS;
15316 } else
15317 ctxt->keepBlanks = 1;
15318 if (options & XML_PARSE_DTDVALID) {
15319 ctxt->validate = 1;
15320 if (options & XML_PARSE_NOWARNING)
15321 ctxt->vctxt.warning = NULL;
15322 if (options & XML_PARSE_NOERROR)
15323 ctxt->vctxt.error = NULL;
15324 options -= XML_PARSE_DTDVALID;
15325 ctxt->options |= XML_PARSE_DTDVALID;
15326 } else
15327 ctxt->validate = 0;
15328 if (options & XML_PARSE_NOWARNING) {
15329 ctxt->sax->warning = NULL;
15330 options -= XML_PARSE_NOWARNING;
15331 }
15332 if (options & XML_PARSE_NOERROR) {
15333 ctxt->sax->error = NULL;
15334 ctxt->sax->fatalError = NULL;
15335 options -= XML_PARSE_NOERROR;
15336 }
15337 #ifdef LIBXML_SAX1_ENABLED
15338 if (options & XML_PARSE_SAX1) {
15339 ctxt->sax->startElement = xmlSAX2StartElement;
15340 ctxt->sax->endElement = xmlSAX2EndElement;
15341 ctxt->sax->startElementNs = NULL;
15342 ctxt->sax->endElementNs = NULL;
15343 ctxt->sax->initialized = 1;
15344 options -= XML_PARSE_SAX1;
15345 ctxt->options |= XML_PARSE_SAX1;
15346 }
15347 #endif /* LIBXML_SAX1_ENABLED */
15348 if (options & XML_PARSE_NODICT) {
15349 ctxt->dictNames = 0;
15350 options -= XML_PARSE_NODICT;
15351 ctxt->options |= XML_PARSE_NODICT;
15352 } else {
15353 ctxt->dictNames = 1;
15354 }
15355 if (options & XML_PARSE_NOCDATA) {
15356 ctxt->sax->cdataBlock = NULL;
15357 options -= XML_PARSE_NOCDATA;
15358 ctxt->options |= XML_PARSE_NOCDATA;
15359 }
15360 if (options & XML_PARSE_NSCLEAN) {
15361 ctxt->options |= XML_PARSE_NSCLEAN;
15362 options -= XML_PARSE_NSCLEAN;
15363 }
15364 if (options & XML_PARSE_NONET) {
15365 ctxt->options |= XML_PARSE_NONET;
15366 options -= XML_PARSE_NONET;
15367 }
15368 if (options & XML_PARSE_COMPACT) {
15369 ctxt->options |= XML_PARSE_COMPACT;
15370 options -= XML_PARSE_COMPACT;
15371 }
15372 if (options & XML_PARSE_OLD10) {
15373 ctxt->options |= XML_PARSE_OLD10;
15374 options -= XML_PARSE_OLD10;
15375 }
15376 if (options & XML_PARSE_NOBASEFIX) {
15377 ctxt->options |= XML_PARSE_NOBASEFIX;
15378 options -= XML_PARSE_NOBASEFIX;
15379 }
15380 if (options & XML_PARSE_HUGE) {
15381 ctxt->options |= XML_PARSE_HUGE;
15382 options -= XML_PARSE_HUGE;
15383 if (ctxt->dict != NULL)
15384 xmlDictSetLimit(ctxt->dict, 0);
15385 }
15386 if (options & XML_PARSE_OLDSAX) {
15387 ctxt->options |= XML_PARSE_OLDSAX;
15388 options -= XML_PARSE_OLDSAX;
15389 }
15390 if (options & XML_PARSE_IGNORE_ENC) {
15391 ctxt->options |= XML_PARSE_IGNORE_ENC;
15392 options -= XML_PARSE_IGNORE_ENC;
15393 }
15394 if (options & XML_PARSE_BIG_LINES) {
15395 ctxt->options |= XML_PARSE_BIG_LINES;
15396 options -= XML_PARSE_BIG_LINES;
15397 }
15398 ctxt->linenumbers = 1;
15399 return (options);
15400 }
15401
15402 /**
15403 * xmlCtxtUseOptions:
15404 * @ctxt: an XML parser context
15405 * @options: a combination of xmlParserOption
15406 *
15407 * Applies the options to the parser context
15408 *
15409 * Returns 0 in case of success, the set of unknown or unimplemented options
15410 * in case of error.
15411 */
15412 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)15413 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15414 {
15415 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15416 }
15417
15418 /**
15419 * xmlDoRead:
15420 * @ctxt: an XML parser context
15421 * @URL: the base URL to use for the document
15422 * @encoding: the document encoding, or NULL
15423 * @options: a combination of xmlParserOption
15424 * @reuse: keep the context for reuse
15425 *
15426 * Common front-end for the xmlRead functions
15427 *
15428 * Returns the resulting document tree or NULL
15429 */
15430 static xmlDocPtr
xmlDoRead(xmlParserCtxtPtr ctxt,const char * URL,const char * encoding,int options,int reuse)15431 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15432 int options, int reuse)
15433 {
15434 xmlDocPtr ret;
15435
15436 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15437 if (encoding != NULL) {
15438 xmlCharEncodingHandlerPtr hdlr;
15439
15440 hdlr = xmlFindCharEncodingHandler(encoding);
15441 if (hdlr != NULL)
15442 xmlSwitchToEncoding(ctxt, hdlr);
15443 }
15444 if ((URL != NULL) && (ctxt->input != NULL) &&
15445 (ctxt->input->filename == NULL))
15446 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15447 xmlParseDocument(ctxt);
15448 if ((ctxt->wellFormed) || ctxt->recovery)
15449 ret = ctxt->myDoc;
15450 else {
15451 ret = NULL;
15452 if (ctxt->myDoc != NULL) {
15453 xmlFreeDoc(ctxt->myDoc);
15454 }
15455 }
15456 ctxt->myDoc = NULL;
15457 if (!reuse) {
15458 xmlFreeParserCtxt(ctxt);
15459 }
15460
15461 return (ret);
15462 }
15463
15464 /**
15465 * xmlReadDoc:
15466 * @cur: a pointer to a zero terminated string
15467 * @URL: the base URL to use for the document
15468 * @encoding: the document encoding, or NULL
15469 * @options: a combination of xmlParserOption
15470 *
15471 * parse an XML in-memory document and build a tree.
15472 *
15473 * Returns the resulting document tree
15474 */
15475 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)15476 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15477 {
15478 xmlParserCtxtPtr ctxt;
15479
15480 if (cur == NULL)
15481 return (NULL);
15482 xmlInitParser();
15483
15484 ctxt = xmlCreateDocParserCtxt(cur);
15485 if (ctxt == NULL)
15486 return (NULL);
15487 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15488 }
15489
15490 /**
15491 * xmlReadFile:
15492 * @filename: a file or URL
15493 * @encoding: the document encoding, or NULL
15494 * @options: a combination of xmlParserOption
15495 *
15496 * parse an XML file from the filesystem or the network.
15497 *
15498 * Returns the resulting document tree
15499 */
15500 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)15501 xmlReadFile(const char *filename, const char *encoding, int options)
15502 {
15503 xmlParserCtxtPtr ctxt;
15504
15505 xmlInitParser();
15506 ctxt = xmlCreateURLParserCtxt(filename, options);
15507 if (ctxt == NULL)
15508 return (NULL);
15509 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15510 }
15511
15512 /**
15513 * xmlReadMemory:
15514 * @buffer: a pointer to a char array
15515 * @size: the size of the array
15516 * @URL: the base URL to use for the document
15517 * @encoding: the document encoding, or NULL
15518 * @options: a combination of xmlParserOption
15519 *
15520 * parse an XML in-memory document and build a tree.
15521 *
15522 * Returns the resulting document tree
15523 */
15524 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * URL,const char * encoding,int options)15525 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15526 {
15527 xmlParserCtxtPtr ctxt;
15528
15529 xmlInitParser();
15530 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15531 if (ctxt == NULL)
15532 return (NULL);
15533 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15534 }
15535
15536 /**
15537 * xmlReadFd:
15538 * @fd: an open file descriptor
15539 * @URL: the base URL to use for the document
15540 * @encoding: the document encoding, or NULL
15541 * @options: a combination of xmlParserOption
15542 *
15543 * parse an XML from a file descriptor and build a tree.
15544 * NOTE that the file descriptor will not be closed when the
15545 * reader is closed or reset.
15546 *
15547 * Returns the resulting document tree
15548 */
15549 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)15550 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15551 {
15552 xmlParserCtxtPtr ctxt;
15553 xmlParserInputBufferPtr input;
15554 xmlParserInputPtr stream;
15555
15556 if (fd < 0)
15557 return (NULL);
15558 xmlInitParser();
15559
15560 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15561 if (input == NULL)
15562 return (NULL);
15563 input->closecallback = NULL;
15564 ctxt = xmlNewParserCtxt();
15565 if (ctxt == NULL) {
15566 xmlFreeParserInputBuffer(input);
15567 return (NULL);
15568 }
15569 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15570 if (stream == NULL) {
15571 xmlFreeParserInputBuffer(input);
15572 xmlFreeParserCtxt(ctxt);
15573 return (NULL);
15574 }
15575 inputPush(ctxt, stream);
15576 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15577 }
15578
15579 /**
15580 * xmlReadIO:
15581 * @ioread: an I/O read function
15582 * @ioclose: an I/O close function
15583 * @ioctx: an I/O handler
15584 * @URL: the base URL to use for the document
15585 * @encoding: the document encoding, or NULL
15586 * @options: a combination of xmlParserOption
15587 *
15588 * parse an XML document from I/O functions and source and build a tree.
15589 *
15590 * Returns the resulting document tree
15591 */
15592 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15593 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15594 void *ioctx, const char *URL, const char *encoding, int options)
15595 {
15596 xmlParserCtxtPtr ctxt;
15597 xmlParserInputBufferPtr input;
15598 xmlParserInputPtr stream;
15599
15600 if (ioread == NULL)
15601 return (NULL);
15602 xmlInitParser();
15603
15604 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15605 XML_CHAR_ENCODING_NONE);
15606 if (input == NULL) {
15607 if (ioclose != NULL)
15608 ioclose(ioctx);
15609 return (NULL);
15610 }
15611 ctxt = xmlNewParserCtxt();
15612 if (ctxt == NULL) {
15613 xmlFreeParserInputBuffer(input);
15614 return (NULL);
15615 }
15616 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15617 if (stream == NULL) {
15618 xmlFreeParserInputBuffer(input);
15619 xmlFreeParserCtxt(ctxt);
15620 return (NULL);
15621 }
15622 inputPush(ctxt, stream);
15623 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15624 }
15625
15626 /**
15627 * xmlCtxtReadDoc:
15628 * @ctxt: an XML parser context
15629 * @cur: a pointer to a zero terminated string
15630 * @URL: the base URL to use for the document
15631 * @encoding: the document encoding, or NULL
15632 * @options: a combination of xmlParserOption
15633 *
15634 * parse an XML in-memory document and build a tree.
15635 * This reuses the existing @ctxt parser context
15636 *
15637 * Returns the resulting document tree
15638 */
15639 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * cur,const char * URL,const char * encoding,int options)15640 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15641 const char *URL, const char *encoding, int options)
15642 {
15643 xmlParserInputPtr stream;
15644
15645 if (cur == NULL)
15646 return (NULL);
15647 if (ctxt == NULL)
15648 return (NULL);
15649 xmlInitParser();
15650
15651 xmlCtxtReset(ctxt);
15652
15653 stream = xmlNewStringInputStream(ctxt, cur);
15654 if (stream == NULL) {
15655 return (NULL);
15656 }
15657 inputPush(ctxt, stream);
15658 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15659 }
15660
15661 /**
15662 * xmlCtxtReadFile:
15663 * @ctxt: an XML parser context
15664 * @filename: a file or URL
15665 * @encoding: the document encoding, or NULL
15666 * @options: a combination of xmlParserOption
15667 *
15668 * parse an XML file from the filesystem or the network.
15669 * This reuses the existing @ctxt parser context
15670 *
15671 * Returns the resulting document tree
15672 */
15673 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)15674 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15675 const char *encoding, int options)
15676 {
15677 xmlParserInputPtr stream;
15678
15679 if (filename == NULL)
15680 return (NULL);
15681 if (ctxt == NULL)
15682 return (NULL);
15683 xmlInitParser();
15684
15685 xmlCtxtReset(ctxt);
15686
15687 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15688 if (stream == NULL) {
15689 return (NULL);
15690 }
15691 inputPush(ctxt, stream);
15692 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15693 }
15694
15695 /**
15696 * xmlCtxtReadMemory:
15697 * @ctxt: an XML parser context
15698 * @buffer: a pointer to a char array
15699 * @size: the size of the array
15700 * @URL: the base URL to use for the document
15701 * @encoding: the document encoding, or NULL
15702 * @options: a combination of xmlParserOption
15703 *
15704 * parse an XML in-memory document and build a tree.
15705 * This reuses the existing @ctxt parser context
15706 *
15707 * Returns the resulting document tree
15708 */
15709 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)15710 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15711 const char *URL, const char *encoding, int options)
15712 {
15713 xmlParserInputBufferPtr input;
15714 xmlParserInputPtr stream;
15715
15716 if (ctxt == NULL)
15717 return (NULL);
15718 if (buffer == NULL)
15719 return (NULL);
15720 xmlInitParser();
15721
15722 xmlCtxtReset(ctxt);
15723
15724 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15725 if (input == NULL) {
15726 return(NULL);
15727 }
15728
15729 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15730 if (stream == NULL) {
15731 xmlFreeParserInputBuffer(input);
15732 return(NULL);
15733 }
15734
15735 inputPush(ctxt, stream);
15736 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15737 }
15738
15739 /**
15740 * xmlCtxtReadFd:
15741 * @ctxt: an XML parser context
15742 * @fd: an open file descriptor
15743 * @URL: the base URL to use for the document
15744 * @encoding: the document encoding, or NULL
15745 * @options: a combination of xmlParserOption
15746 *
15747 * parse an XML from a file descriptor and build a tree.
15748 * This reuses the existing @ctxt parser context
15749 * NOTE that the file descriptor will not be closed when the
15750 * reader is closed or reset.
15751 *
15752 * Returns the resulting document tree
15753 */
15754 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)15755 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15756 const char *URL, const char *encoding, int options)
15757 {
15758 xmlParserInputBufferPtr input;
15759 xmlParserInputPtr stream;
15760
15761 if (fd < 0)
15762 return (NULL);
15763 if (ctxt == NULL)
15764 return (NULL);
15765 xmlInitParser();
15766
15767 xmlCtxtReset(ctxt);
15768
15769
15770 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15771 if (input == NULL)
15772 return (NULL);
15773 input->closecallback = NULL;
15774 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15775 if (stream == NULL) {
15776 xmlFreeParserInputBuffer(input);
15777 return (NULL);
15778 }
15779 inputPush(ctxt, stream);
15780 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15781 }
15782
15783 /**
15784 * xmlCtxtReadIO:
15785 * @ctxt: an XML parser context
15786 * @ioread: an I/O read function
15787 * @ioclose: an I/O close function
15788 * @ioctx: an I/O handler
15789 * @URL: the base URL to use for the document
15790 * @encoding: the document encoding, or NULL
15791 * @options: a combination of xmlParserOption
15792 *
15793 * parse an XML document from I/O functions and source and build a tree.
15794 * This reuses the existing @ctxt parser context
15795 *
15796 * Returns the resulting document tree
15797 */
15798 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15799 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15800 xmlInputCloseCallback ioclose, void *ioctx,
15801 const char *URL,
15802 const char *encoding, int options)
15803 {
15804 xmlParserInputBufferPtr input;
15805 xmlParserInputPtr stream;
15806
15807 if (ioread == NULL)
15808 return (NULL);
15809 if (ctxt == NULL)
15810 return (NULL);
15811 xmlInitParser();
15812
15813 xmlCtxtReset(ctxt);
15814
15815 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15816 XML_CHAR_ENCODING_NONE);
15817 if (input == NULL) {
15818 if (ioclose != NULL)
15819 ioclose(ioctx);
15820 return (NULL);
15821 }
15822 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15823 if (stream == NULL) {
15824 xmlFreeParserInputBuffer(input);
15825 return (NULL);
15826 }
15827 inputPush(ctxt, stream);
15828 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15829 }
15830
15831 #define bottom_parser
15832 #include "elfgcchack.h"
15833