1 /*
2  * entities.c : implementation for the XML entities handling
3  *
4  * See Copyright for the status of this software.
5  *
6  * daniel@veillard.com
7  */
8 
9 /* To avoid EBCDIC trouble when parsing on zOS */
10 #if defined(__MVS__)
11 #pragma convert("ISO8859-1")
12 #endif
13 
14 #define IN_LIBXML
15 #include "libxml.h"
16 
17 #include <string.h>
18 #ifdef HAVE_STDLIB_H
19 #include <stdlib.h>
20 #endif
21 #include <libxml/xmlmemory.h>
22 #include <libxml/hash.h>
23 #include <libxml/entities.h>
24 #include <libxml/parser.h>
25 #include <libxml/parserInternals.h>
26 #include <libxml/xmlerror.h>
27 #include <libxml/globals.h>
28 #include <libxml/dict.h>
29 
30 #include "save.h"
31 
32 /*
33  * The XML predefined entities.
34  */
35 
36 static xmlEntity xmlEntityLt = {
37     NULL, XML_ENTITY_DECL, BAD_CAST "lt",
38     NULL, NULL, NULL, NULL, NULL, NULL,
39     BAD_CAST "<", BAD_CAST "<", 1,
40     XML_INTERNAL_PREDEFINED_ENTITY,
41     NULL, NULL, NULL, NULL, 0, 1
42 };
43 static xmlEntity xmlEntityGt = {
44     NULL, XML_ENTITY_DECL, BAD_CAST "gt",
45     NULL, NULL, NULL, NULL, NULL, NULL,
46     BAD_CAST ">", BAD_CAST ">", 1,
47     XML_INTERNAL_PREDEFINED_ENTITY,
48     NULL, NULL, NULL, NULL, 0, 1
49 };
50 static xmlEntity xmlEntityAmp = {
51     NULL, XML_ENTITY_DECL, BAD_CAST "amp",
52     NULL, NULL, NULL, NULL, NULL, NULL,
53     BAD_CAST "&", BAD_CAST "&", 1,
54     XML_INTERNAL_PREDEFINED_ENTITY,
55     NULL, NULL, NULL, NULL, 0, 1
56 };
57 static xmlEntity xmlEntityQuot = {
58     NULL, XML_ENTITY_DECL, BAD_CAST "quot",
59     NULL, NULL, NULL, NULL, NULL, NULL,
60     BAD_CAST "\"", BAD_CAST "\"", 1,
61     XML_INTERNAL_PREDEFINED_ENTITY,
62     NULL, NULL, NULL, NULL, 0, 1
63 };
64 static xmlEntity xmlEntityApos = {
65     NULL, XML_ENTITY_DECL, BAD_CAST "apos",
66     NULL, NULL, NULL, NULL, NULL, NULL,
67     BAD_CAST "'", BAD_CAST "'", 1,
68     XML_INTERNAL_PREDEFINED_ENTITY,
69     NULL, NULL, NULL, NULL, 0, 1
70 };
71 
72 /**
73  * xmlEntitiesErrMemory:
74  * @extra:  extra information
75  *
76  * Handle an out of memory condition
77  */
78 static void
xmlEntitiesErrMemory(const char * extra)79 xmlEntitiesErrMemory(const char *extra)
80 {
81     __xmlSimpleError(XML_FROM_TREE, XML_ERR_NO_MEMORY, NULL, NULL, extra);
82 }
83 
84 /**
85  * xmlEntitiesErr:
86  * @code:  the error code
87  * @msg:  the message
88  *
89  * Handle an out of memory condition
90  */
91 static void LIBXML_ATTR_FORMAT(2,0)
xmlEntitiesErr(xmlParserErrors code,const char * msg)92 xmlEntitiesErr(xmlParserErrors code, const char *msg)
93 {
94     __xmlSimpleError(XML_FROM_TREE, code, NULL, msg, NULL);
95 }
96 
97 /*
98  * xmlFreeEntity : clean-up an entity record.
99  */
100 static void
xmlFreeEntity(xmlEntityPtr entity)101 xmlFreeEntity(xmlEntityPtr entity)
102 {
103     xmlDictPtr dict = NULL;
104 
105     if (entity == NULL)
106         return;
107 
108     if (entity->doc != NULL)
109         dict = entity->doc->dict;
110 
111 
112     if ((entity->children) && (entity->owner == 1) &&
113         (entity == (xmlEntityPtr) entity->children->parent))
114         xmlFreeNodeList(entity->children);
115     if (dict != NULL) {
116         if ((entity->name != NULL) && (!xmlDictOwns(dict, entity->name)))
117             xmlFree((char *) entity->name);
118         if ((entity->ExternalID != NULL) &&
119 	    (!xmlDictOwns(dict, entity->ExternalID)))
120             xmlFree((char *) entity->ExternalID);
121         if ((entity->SystemID != NULL) &&
122 	    (!xmlDictOwns(dict, entity->SystemID)))
123             xmlFree((char *) entity->SystemID);
124         if ((entity->URI != NULL) && (!xmlDictOwns(dict, entity->URI)))
125             xmlFree((char *) entity->URI);
126         if ((entity->content != NULL)
127             && (!xmlDictOwns(dict, entity->content)))
128             xmlFree((char *) entity->content);
129         if ((entity->orig != NULL) && (!xmlDictOwns(dict, entity->orig)))
130             xmlFree((char *) entity->orig);
131     } else {
132         if (entity->name != NULL)
133             xmlFree((char *) entity->name);
134         if (entity->ExternalID != NULL)
135             xmlFree((char *) entity->ExternalID);
136         if (entity->SystemID != NULL)
137             xmlFree((char *) entity->SystemID);
138         if (entity->URI != NULL)
139             xmlFree((char *) entity->URI);
140         if (entity->content != NULL)
141             xmlFree((char *) entity->content);
142         if (entity->orig != NULL)
143             xmlFree((char *) entity->orig);
144     }
145     xmlFree(entity);
146 }
147 
148 /*
149  * xmlCreateEntity:
150  *
151  * internal routine doing the entity node structures allocations
152  */
153 static xmlEntityPtr
xmlCreateEntity(xmlDictPtr dict,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)154 xmlCreateEntity(xmlDictPtr dict, const xmlChar *name, int type,
155 	        const xmlChar *ExternalID, const xmlChar *SystemID,
156 	        const xmlChar *content) {
157     xmlEntityPtr ret;
158 
159     ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
160     if (ret == NULL) {
161         xmlEntitiesErrMemory("xmlCreateEntity: malloc failed");
162 	return(NULL);
163     }
164     memset(ret, 0, sizeof(xmlEntity));
165     ret->type = XML_ENTITY_DECL;
166     ret->checked = 0;
167 
168     /*
169      * fill the structure.
170      */
171     ret->etype = (xmlEntityType) type;
172     if (dict == NULL) {
173 	ret->name = xmlStrdup(name);
174 	if (ExternalID != NULL)
175 	    ret->ExternalID = xmlStrdup(ExternalID);
176 	if (SystemID != NULL)
177 	    ret->SystemID = xmlStrdup(SystemID);
178     } else {
179         ret->name = xmlDictLookup(dict, name, -1);
180 	if (ExternalID != NULL)
181 	    ret->ExternalID = xmlDictLookup(dict, ExternalID, -1);
182 	if (SystemID != NULL)
183 	    ret->SystemID = xmlDictLookup(dict, SystemID, -1);
184     }
185     if (content != NULL) {
186         ret->length = xmlStrlen(content);
187 	if ((dict != NULL) && (ret->length < 5))
188 	    ret->content = (xmlChar *)
189 	                   xmlDictLookup(dict, content, ret->length);
190 	else
191 	    ret->content = xmlStrndup(content, ret->length);
192      } else {
193         ret->length = 0;
194         ret->content = NULL;
195     }
196     ret->URI = NULL; /* to be computed by the layer knowing
197 			the defining entity */
198     ret->orig = NULL;
199     ret->owner = 0;
200 
201     return(ret);
202 }
203 
204 /*
205  * xmlAddEntity : register a new entity for an entities table.
206  */
207 static xmlEntityPtr
xmlAddEntity(xmlDtdPtr dtd,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)208 xmlAddEntity(xmlDtdPtr dtd, const xmlChar *name, int type,
209 	  const xmlChar *ExternalID, const xmlChar *SystemID,
210 	  const xmlChar *content) {
211     xmlDictPtr dict = NULL;
212     xmlEntitiesTablePtr table = NULL;
213     xmlEntityPtr ret, predef;
214 
215     if (name == NULL)
216 	return(NULL);
217     if (dtd == NULL)
218 	return(NULL);
219     if (dtd->doc != NULL)
220         dict = dtd->doc->dict;
221 
222     switch (type) {
223         case XML_INTERNAL_GENERAL_ENTITY:
224         case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
225         case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
226             predef = xmlGetPredefinedEntity(name);
227             if (predef != NULL) {
228                 int valid = 0;
229 
230                 /* 4.6 Predefined Entities */
231                 if ((type == XML_INTERNAL_GENERAL_ENTITY) &&
232                     (content != NULL)) {
233                     int c = predef->content[0];
234 
235                     if (((content[0] == c) && (content[1] == 0)) &&
236                         ((c == '>') || (c == '\'') || (c == '"'))) {
237                         valid = 1;
238                     } else if ((content[0] == '&') && (content[1] == '#')) {
239                         if (content[2] == 'x') {
240                             xmlChar *hex = BAD_CAST "0123456789ABCDEF";
241                             xmlChar ref[] = "00;";
242 
243                             ref[0] = hex[c / 16 % 16];
244                             ref[1] = hex[c % 16];
245                             if (xmlStrcasecmp(&content[3], ref) == 0)
246                                 valid = 1;
247                         } else {
248                             xmlChar ref[] = "00;";
249 
250                             ref[0] = '0' + c / 10 % 10;
251                             ref[1] = '0' + c % 10;
252                             if (xmlStrEqual(&content[2], ref))
253                                 valid = 1;
254                         }
255                     }
256                 }
257                 if (!valid) {
258                     xmlEntitiesErr(XML_ERR_ENTITY_PROCESSING,
259                             "xmlAddEntity: invalid redeclaration of predefined"
260                             " entity");
261                     return(NULL);
262                 }
263             }
264 	    if (dtd->entities == NULL)
265 		dtd->entities = xmlHashCreateDict(0, dict);
266 	    table = dtd->entities;
267 	    break;
268         case XML_INTERNAL_PARAMETER_ENTITY:
269         case XML_EXTERNAL_PARAMETER_ENTITY:
270 	    if (dtd->pentities == NULL)
271 		dtd->pentities = xmlHashCreateDict(0, dict);
272 	    table = dtd->pentities;
273 	    break;
274         case XML_INTERNAL_PREDEFINED_ENTITY:
275 	    return(NULL);
276     }
277     if (table == NULL)
278 	return(NULL);
279     ret = xmlCreateEntity(dict, name, type, ExternalID, SystemID, content);
280     if (ret == NULL)
281         return(NULL);
282     ret->doc = dtd->doc;
283 
284     if (xmlHashAddEntry(table, name, ret)) {
285 	/*
286 	 * entity was already defined at another level.
287 	 */
288         xmlFreeEntity(ret);
289 	return(NULL);
290     }
291     return(ret);
292 }
293 
294 /**
295  * xmlGetPredefinedEntity:
296  * @name:  the entity name
297  *
298  * Check whether this name is an predefined entity.
299  *
300  * Returns NULL if not, otherwise the entity
301  */
302 xmlEntityPtr
xmlGetPredefinedEntity(const xmlChar * name)303 xmlGetPredefinedEntity(const xmlChar *name) {
304     if (name == NULL) return(NULL);
305     switch (name[0]) {
306         case 'l':
307 	    if (xmlStrEqual(name, BAD_CAST "lt"))
308 	        return(&xmlEntityLt);
309 	    break;
310         case 'g':
311 	    if (xmlStrEqual(name, BAD_CAST "gt"))
312 	        return(&xmlEntityGt);
313 	    break;
314         case 'a':
315 	    if (xmlStrEqual(name, BAD_CAST "amp"))
316 	        return(&xmlEntityAmp);
317 	    if (xmlStrEqual(name, BAD_CAST "apos"))
318 	        return(&xmlEntityApos);
319 	    break;
320         case 'q':
321 	    if (xmlStrEqual(name, BAD_CAST "quot"))
322 	        return(&xmlEntityQuot);
323 	    break;
324 	default:
325 	    break;
326     }
327     return(NULL);
328 }
329 
330 /**
331  * xmlAddDtdEntity:
332  * @doc:  the document
333  * @name:  the entity name
334  * @type:  the entity type XML_xxx_yyy_ENTITY
335  * @ExternalID:  the entity external ID if available
336  * @SystemID:  the entity system ID if available
337  * @content:  the entity content
338  *
339  * Register a new entity for this document DTD external subset.
340  *
341  * Returns a pointer to the entity or NULL in case of error
342  */
343 xmlEntityPtr
xmlAddDtdEntity(xmlDocPtr doc,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)344 xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type,
345 	        const xmlChar *ExternalID, const xmlChar *SystemID,
346 		const xmlChar *content) {
347     xmlEntityPtr ret;
348     xmlDtdPtr dtd;
349 
350     if (doc == NULL) {
351 	xmlEntitiesErr(XML_DTD_NO_DOC,
352 	        "xmlAddDtdEntity: document is NULL");
353 	return(NULL);
354     }
355     if (doc->extSubset == NULL) {
356 	xmlEntitiesErr(XML_DTD_NO_DTD,
357 	        "xmlAddDtdEntity: document without external subset");
358 	return(NULL);
359     }
360     dtd = doc->extSubset;
361     ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
362     if (ret == NULL) return(NULL);
363 
364     /*
365      * Link it to the DTD
366      */
367     ret->parent = dtd;
368     ret->doc = dtd->doc;
369     if (dtd->last == NULL) {
370 	dtd->children = dtd->last = (xmlNodePtr) ret;
371     } else {
372         dtd->last->next = (xmlNodePtr) ret;
373 	ret->prev = dtd->last;
374 	dtd->last = (xmlNodePtr) ret;
375     }
376     return(ret);
377 }
378 
379 /**
380  * xmlAddDocEntity:
381  * @doc:  the document
382  * @name:  the entity name
383  * @type:  the entity type XML_xxx_yyy_ENTITY
384  * @ExternalID:  the entity external ID if available
385  * @SystemID:  the entity system ID if available
386  * @content:  the entity content
387  *
388  * Register a new entity for this document.
389  *
390  * Returns a pointer to the entity or NULL in case of error
391  */
392 xmlEntityPtr
xmlAddDocEntity(xmlDocPtr doc,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)393 xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type,
394 	        const xmlChar *ExternalID, const xmlChar *SystemID,
395 	        const xmlChar *content) {
396     xmlEntityPtr ret;
397     xmlDtdPtr dtd;
398 
399     if (doc == NULL) {
400 	xmlEntitiesErr(XML_DTD_NO_DOC,
401 	        "xmlAddDocEntity: document is NULL");
402 	return(NULL);
403     }
404     if (doc->intSubset == NULL) {
405 	xmlEntitiesErr(XML_DTD_NO_DTD,
406 	        "xmlAddDocEntity: document without internal subset");
407 	return(NULL);
408     }
409     dtd = doc->intSubset;
410     ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
411     if (ret == NULL) return(NULL);
412 
413     /*
414      * Link it to the DTD
415      */
416     ret->parent = dtd;
417     ret->doc = dtd->doc;
418     if (dtd->last == NULL) {
419 	dtd->children = dtd->last = (xmlNodePtr) ret;
420     } else {
421 	dtd->last->next = (xmlNodePtr) ret;
422 	ret->prev = dtd->last;
423 	dtd->last = (xmlNodePtr) ret;
424     }
425     return(ret);
426 }
427 
428 /**
429  * xmlNewEntity:
430  * @doc:  the document
431  * @name:  the entity name
432  * @type:  the entity type XML_xxx_yyy_ENTITY
433  * @ExternalID:  the entity external ID if available
434  * @SystemID:  the entity system ID if available
435  * @content:  the entity content
436  *
437  * Create a new entity, this differs from xmlAddDocEntity() that if
438  * the document is NULL or has no internal subset defined, then an
439  * unlinked entity structure will be returned, it is then the responsibility
440  * of the caller to link it to the document later or free it when not needed
441  * anymore.
442  *
443  * Returns a pointer to the entity or NULL in case of error
444  */
445 xmlEntityPtr
xmlNewEntity(xmlDocPtr doc,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)446 xmlNewEntity(xmlDocPtr doc, const xmlChar *name, int type,
447 	     const xmlChar *ExternalID, const xmlChar *SystemID,
448 	     const xmlChar *content) {
449     xmlEntityPtr ret;
450     xmlDictPtr dict;
451 
452     if ((doc != NULL) && (doc->intSubset != NULL)) {
453 	return(xmlAddDocEntity(doc, name, type, ExternalID, SystemID, content));
454     }
455     if (doc != NULL)
456         dict = doc->dict;
457     else
458         dict = NULL;
459     ret = xmlCreateEntity(dict, name, type, ExternalID, SystemID, content);
460     if (ret == NULL)
461         return(NULL);
462     ret->doc = doc;
463     return(ret);
464 }
465 
466 /**
467  * xmlGetEntityFromTable:
468  * @table:  an entity table
469  * @name:  the entity name
470  * @parameter:  look for parameter entities
471  *
472  * Do an entity lookup in the table.
473  * returns the corresponding parameter entity, if found.
474  *
475  * Returns A pointer to the entity structure or NULL if not found.
476  */
477 static xmlEntityPtr
xmlGetEntityFromTable(xmlEntitiesTablePtr table,const xmlChar * name)478 xmlGetEntityFromTable(xmlEntitiesTablePtr table, const xmlChar *name) {
479     return((xmlEntityPtr) xmlHashLookup(table, name));
480 }
481 
482 /**
483  * xmlGetParameterEntity:
484  * @doc:  the document referencing the entity
485  * @name:  the entity name
486  *
487  * Do an entity lookup in the internal and external subsets and
488  * returns the corresponding parameter entity, if found.
489  *
490  * Returns A pointer to the entity structure or NULL if not found.
491  */
492 xmlEntityPtr
xmlGetParameterEntity(xmlDocPtr doc,const xmlChar * name)493 xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) {
494     xmlEntitiesTablePtr table;
495     xmlEntityPtr ret;
496 
497     if (doc == NULL)
498 	return(NULL);
499     if ((doc->intSubset != NULL) && (doc->intSubset->pentities != NULL)) {
500 	table = (xmlEntitiesTablePtr) doc->intSubset->pentities;
501 	ret = xmlGetEntityFromTable(table, name);
502 	if (ret != NULL)
503 	    return(ret);
504     }
505     if ((doc->extSubset != NULL) && (doc->extSubset->pentities != NULL)) {
506 	table = (xmlEntitiesTablePtr) doc->extSubset->pentities;
507 	return(xmlGetEntityFromTable(table, name));
508     }
509     return(NULL);
510 }
511 
512 /**
513  * xmlGetDtdEntity:
514  * @doc:  the document referencing the entity
515  * @name:  the entity name
516  *
517  * Do an entity lookup in the DTD entity hash table and
518  * returns the corresponding entity, if found.
519  * Note: the first argument is the document node, not the DTD node.
520  *
521  * Returns A pointer to the entity structure or NULL if not found.
522  */
523 xmlEntityPtr
xmlGetDtdEntity(xmlDocPtr doc,const xmlChar * name)524 xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) {
525     xmlEntitiesTablePtr table;
526 
527     if (doc == NULL)
528 	return(NULL);
529     if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
530 	table = (xmlEntitiesTablePtr) doc->extSubset->entities;
531 	return(xmlGetEntityFromTable(table, name));
532     }
533     return(NULL);
534 }
535 
536 /**
537  * xmlGetDocEntity:
538  * @doc:  the document referencing the entity
539  * @name:  the entity name
540  *
541  * Do an entity lookup in the document entity hash table and
542  * returns the corresponding entity, otherwise a lookup is done
543  * in the predefined entities too.
544  *
545  * Returns A pointer to the entity structure or NULL if not found.
546  */
547 xmlEntityPtr
xmlGetDocEntity(const xmlDoc * doc,const xmlChar * name)548 xmlGetDocEntity(const xmlDoc *doc, const xmlChar *name) {
549     xmlEntityPtr cur;
550     xmlEntitiesTablePtr table;
551 
552     if (doc != NULL) {
553 	if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
554 	    table = (xmlEntitiesTablePtr) doc->intSubset->entities;
555 	    cur = xmlGetEntityFromTable(table, name);
556 	    if (cur != NULL)
557 		return(cur);
558 	}
559 	if (doc->standalone != 1) {
560 	    if ((doc->extSubset != NULL) &&
561 		(doc->extSubset->entities != NULL)) {
562 		table = (xmlEntitiesTablePtr) doc->extSubset->entities;
563 		cur = xmlGetEntityFromTable(table, name);
564 		if (cur != NULL)
565 		    return(cur);
566 	    }
567 	}
568     }
569     return(xmlGetPredefinedEntity(name));
570 }
571 
572 /*
573  * Macro used to grow the current buffer.
574  */
575 #define growBufferReentrant() {						\
576     xmlChar *tmp;                                                       \
577     size_t new_size = buffer_size * 2;                                  \
578     if (new_size < buffer_size) goto mem_error;                         \
579     tmp = (xmlChar *) xmlRealloc(buffer, new_size);	                \
580     if (tmp == NULL) goto mem_error;                                    \
581     buffer = tmp;							\
582     buffer_size = new_size;						\
583 }
584 
585 /**
586  * xmlEncodeEntitiesInternal:
587  * @doc:  the document containing the string
588  * @input:  A string to convert to XML.
589  * @attr: are we handling an attribute value
590  *
591  * Do a global encoding of a string, replacing the predefined entities
592  * and non ASCII values with their entities and CharRef counterparts.
593  * Contrary to xmlEncodeEntities, this routine is reentrant, and result
594  * must be deallocated.
595  *
596  * Returns A newly allocated string with the substitution done.
597  */
598 static xmlChar *
xmlEncodeEntitiesInternal(xmlDocPtr doc,const xmlChar * input,int attr)599 xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) {
600     const xmlChar *cur = input;
601     xmlChar *buffer = NULL;
602     xmlChar *out = NULL;
603     size_t buffer_size = 0;
604     int html = 0;
605 
606     if (input == NULL) return(NULL);
607     if (doc != NULL)
608         html = (doc->type == XML_HTML_DOCUMENT_NODE);
609 
610     /*
611      * allocate an translation buffer.
612      */
613     buffer_size = 1000;
614     buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
615     if (buffer == NULL) {
616         xmlEntitiesErrMemory("xmlEncodeEntities: malloc failed");
617 	return(NULL);
618     }
619     out = buffer;
620 
621     while (*cur != '\0') {
622         size_t indx = out - buffer;
623         if (indx + 100 > buffer_size) {
624 
625 	    growBufferReentrant();
626 	    out = &buffer[indx];
627 	}
628 
629 	/*
630 	 * By default one have to encode at least '<', '>', '"' and '&' !
631 	 */
632 	if (*cur == '<') {
633 	    const xmlChar *end;
634 
635 	    /*
636 	     * Special handling of server side include in HTML attributes
637 	     */
638 	    if (html && attr &&
639 	        (cur[1] == '!') && (cur[2] == '-') && (cur[3] == '-') &&
640 	        ((end = xmlStrstr(cur, BAD_CAST "-->")) != NULL)) {
641 	        while (cur != end) {
642 		    *out++ = *cur++;
643 		    indx = out - buffer;
644 		    if (indx + 100 > buffer_size) {
645 			growBufferReentrant();
646 			out = &buffer[indx];
647 		    }
648 		}
649 		*out++ = *cur++;
650 		*out++ = *cur++;
651 		*out++ = *cur++;
652 		continue;
653 	    }
654 	    *out++ = '&';
655 	    *out++ = 'l';
656 	    *out++ = 't';
657 	    *out++ = ';';
658 	} else if (*cur == '>') {
659 	    *out++ = '&';
660 	    *out++ = 'g';
661 	    *out++ = 't';
662 	    *out++ = ';';
663 	} else if (*cur == '&') {
664 	    /*
665 	     * Special handling of &{...} construct from HTML 4, see
666 	     * http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1
667 	     */
668 	    if (html && attr && (cur[1] == '{') &&
669 	        (strchr((const char *) cur, '}'))) {
670 	        while (*cur != '}') {
671 		    *out++ = *cur++;
672 		    indx = out - buffer;
673 		    if (indx + 100 > buffer_size) {
674 			growBufferReentrant();
675 			out = &buffer[indx];
676 		    }
677 		}
678 		*out++ = *cur++;
679 		continue;
680 	    }
681 	    *out++ = '&';
682 	    *out++ = 'a';
683 	    *out++ = 'm';
684 	    *out++ = 'p';
685 	    *out++ = ';';
686 	} else if (((*cur >= 0x20) && (*cur < 0x80)) ||
687 	    (*cur == '\n') || (*cur == '\t') || ((html) && (*cur == '\r'))) {
688 	    /*
689 	     * default case, just copy !
690 	     */
691 	    *out++ = *cur;
692 	} else if (*cur >= 0x80) {
693 	    if (((doc != NULL) && (doc->encoding != NULL)) || (html)) {
694 		/*
695 		 * Bjørn Reese <br@sseusa.com> provided the patch
696 	        xmlChar xc;
697 	        xc = (*cur & 0x3F) << 6;
698 	        if (cur[1] != 0) {
699 		    xc += *(++cur) & 0x3F;
700 		    *out++ = xc;
701 	        } else
702 		 */
703 		*out++ = *cur;
704 	    } else {
705 		/*
706 		 * We assume we have UTF-8 input.
707 		 * It must match either:
708 		 *   110xxxxx 10xxxxxx
709 		 *   1110xxxx 10xxxxxx 10xxxxxx
710 		 *   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
711 		 * That is:
712 		 *   cur[0] is 11xxxxxx
713 		 *   cur[1] is 10xxxxxx
714 		 *   cur[2] is 10xxxxxx if cur[0] is 111xxxxx
715 		 *   cur[3] is 10xxxxxx if cur[0] is 1111xxxx
716 		 *   cur[0] is not 11111xxx
717 		 */
718 		char buf[11], *ptr;
719 		int val = 0, l = 1;
720 
721 		if (((cur[0] & 0xC0) != 0xC0) ||
722 		    ((cur[1] & 0xC0) != 0x80) ||
723 		    (((cur[0] & 0xE0) == 0xE0) && ((cur[2] & 0xC0) != 0x80)) ||
724 		    (((cur[0] & 0xF0) == 0xF0) && ((cur[3] & 0xC0) != 0x80)) ||
725 		    (((cur[0] & 0xF8) == 0xF8))) {
726 		    xmlEntitiesErr(XML_CHECK_NOT_UTF8,
727 			    "xmlEncodeEntities: input not UTF-8");
728 		    if (doc != NULL)
729 			doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
730 		    snprintf(buf, sizeof(buf), "&#%d;", *cur);
731 		    buf[sizeof(buf) - 1] = 0;
732 		    ptr = buf;
733 		    while (*ptr != 0) *out++ = *ptr++;
734 		    cur++;
735 		    continue;
736 		} else if (*cur < 0xE0) {
737                     val = (cur[0]) & 0x1F;
738 		    val <<= 6;
739 		    val |= (cur[1]) & 0x3F;
740 		    l = 2;
741 		} else if (*cur < 0xF0) {
742                     val = (cur[0]) & 0x0F;
743 		    val <<= 6;
744 		    val |= (cur[1]) & 0x3F;
745 		    val <<= 6;
746 		    val |= (cur[2]) & 0x3F;
747 		    l = 3;
748 		} else if (*cur < 0xF8) {
749                     val = (cur[0]) & 0x07;
750 		    val <<= 6;
751 		    val |= (cur[1]) & 0x3F;
752 		    val <<= 6;
753 		    val |= (cur[2]) & 0x3F;
754 		    val <<= 6;
755 		    val |= (cur[3]) & 0x3F;
756 		    l = 4;
757 		}
758 		if ((l == 1) || (!IS_CHAR(val))) {
759 		    xmlEntitiesErr(XML_ERR_INVALID_CHAR,
760 			"xmlEncodeEntities: char out of range\n");
761 		    if (doc != NULL)
762 			doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
763 		    snprintf(buf, sizeof(buf), "&#%d;", *cur);
764 		    buf[sizeof(buf) - 1] = 0;
765 		    ptr = buf;
766 		    while (*ptr != 0) *out++ = *ptr++;
767 		    cur++;
768 		    continue;
769 		}
770 		/*
771 		 * We could do multiple things here. Just save as a char ref
772 		 */
773 		snprintf(buf, sizeof(buf), "&#x%X;", val);
774 		buf[sizeof(buf) - 1] = 0;
775 		ptr = buf;
776 		while (*ptr != 0) *out++ = *ptr++;
777 		cur += l;
778 		continue;
779 	    }
780 	} else if (IS_BYTE_CHAR(*cur)) {
781 	    char buf[11], *ptr;
782 
783 	    snprintf(buf, sizeof(buf), "&#%d;", *cur);
784 	    buf[sizeof(buf) - 1] = 0;
785             ptr = buf;
786 	    while (*ptr != 0) *out++ = *ptr++;
787 	}
788 	cur++;
789     }
790     *out = 0;
791     return(buffer);
792 
793 mem_error:
794     xmlEntitiesErrMemory("xmlEncodeEntities: realloc failed");
795     xmlFree(buffer);
796     return(NULL);
797 }
798 
799 /**
800  * xmlEncodeAttributeEntities:
801  * @doc:  the document containing the string
802  * @input:  A string to convert to XML.
803  *
804  * Do a global encoding of a string, replacing the predefined entities
805  * and non ASCII values with their entities and CharRef counterparts for
806  * attribute values.
807  *
808  * Returns A newly allocated string with the substitution done.
809  */
810 xmlChar *
xmlEncodeAttributeEntities(xmlDocPtr doc,const xmlChar * input)811 xmlEncodeAttributeEntities(xmlDocPtr doc, const xmlChar *input) {
812     return xmlEncodeEntitiesInternal(doc, input, 1);
813 }
814 
815 /**
816  * xmlEncodeEntitiesReentrant:
817  * @doc:  the document containing the string
818  * @input:  A string to convert to XML.
819  *
820  * Do a global encoding of a string, replacing the predefined entities
821  * and non ASCII values with their entities and CharRef counterparts.
822  * Contrary to xmlEncodeEntities, this routine is reentrant, and result
823  * must be deallocated.
824  *
825  * Returns A newly allocated string with the substitution done.
826  */
827 xmlChar *
xmlEncodeEntitiesReentrant(xmlDocPtr doc,const xmlChar * input)828 xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
829     return xmlEncodeEntitiesInternal(doc, input, 0);
830 }
831 
832 /**
833  * xmlEncodeSpecialChars:
834  * @doc:  the document containing the string
835  * @input:  A string to convert to XML.
836  *
837  * Do a global encoding of a string, replacing the predefined entities
838  * this routine is reentrant, and result must be deallocated.
839  *
840  * Returns A newly allocated string with the substitution done.
841  */
842 xmlChar *
xmlEncodeSpecialChars(const xmlDoc * doc ATTRIBUTE_UNUSED,const xmlChar * input)843 xmlEncodeSpecialChars(const xmlDoc *doc ATTRIBUTE_UNUSED, const xmlChar *input) {
844     const xmlChar *cur = input;
845     xmlChar *buffer = NULL;
846     xmlChar *out = NULL;
847     size_t buffer_size = 0;
848     if (input == NULL) return(NULL);
849 
850     /*
851      * allocate an translation buffer.
852      */
853     buffer_size = 1000;
854     buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
855     if (buffer == NULL) {
856         xmlEntitiesErrMemory("xmlEncodeSpecialChars: malloc failed");
857 	return(NULL);
858     }
859     out = buffer;
860 
861     while (*cur != '\0') {
862         size_t indx = out - buffer;
863         if (indx + 10 > buffer_size) {
864 
865 	    growBufferReentrant();
866 	    out = &buffer[indx];
867 	}
868 
869 	/*
870 	 * By default one have to encode at least '<', '>', '"' and '&' !
871 	 */
872 	if (*cur == '<') {
873 	    *out++ = '&';
874 	    *out++ = 'l';
875 	    *out++ = 't';
876 	    *out++ = ';';
877 	} else if (*cur == '>') {
878 	    *out++ = '&';
879 	    *out++ = 'g';
880 	    *out++ = 't';
881 	    *out++ = ';';
882 	} else if (*cur == '&') {
883 	    *out++ = '&';
884 	    *out++ = 'a';
885 	    *out++ = 'm';
886 	    *out++ = 'p';
887 	    *out++ = ';';
888 	} else if (*cur == '"') {
889 	    *out++ = '&';
890 	    *out++ = 'q';
891 	    *out++ = 'u';
892 	    *out++ = 'o';
893 	    *out++ = 't';
894 	    *out++ = ';';
895 	} else if (*cur == '\r') {
896 	    *out++ = '&';
897 	    *out++ = '#';
898 	    *out++ = '1';
899 	    *out++ = '3';
900 	    *out++ = ';';
901 	} else {
902 	    /*
903 	     * Works because on UTF-8, all extended sequences cannot
904 	     * result in bytes in the ASCII range.
905 	     */
906 	    *out++ = *cur;
907 	}
908 	cur++;
909     }
910     *out = 0;
911     return(buffer);
912 
913 mem_error:
914     xmlEntitiesErrMemory("xmlEncodeSpecialChars: realloc failed");
915     xmlFree(buffer);
916     return(NULL);
917 }
918 
919 /**
920  * xmlCreateEntitiesTable:
921  *
922  * create and initialize an empty entities hash table.
923  * This really doesn't make sense and should be deprecated
924  *
925  * Returns the xmlEntitiesTablePtr just created or NULL in case of error.
926  */
927 xmlEntitiesTablePtr
xmlCreateEntitiesTable(void)928 xmlCreateEntitiesTable(void) {
929     return((xmlEntitiesTablePtr) xmlHashCreate(0));
930 }
931 
932 /**
933  * xmlFreeEntityWrapper:
934  * @entity:  An entity
935  * @name:  its name
936  *
937  * Deallocate the memory used by an entities in the hash table.
938  */
939 static void
xmlFreeEntityWrapper(void * entity,const xmlChar * name ATTRIBUTE_UNUSED)940 xmlFreeEntityWrapper(void *entity, const xmlChar *name ATTRIBUTE_UNUSED) {
941     if (entity != NULL)
942 	xmlFreeEntity((xmlEntityPtr) entity);
943 }
944 
945 /**
946  * xmlFreeEntitiesTable:
947  * @table:  An entity table
948  *
949  * Deallocate the memory used by an entities hash table.
950  */
951 void
xmlFreeEntitiesTable(xmlEntitiesTablePtr table)952 xmlFreeEntitiesTable(xmlEntitiesTablePtr table) {
953     xmlHashFree(table, xmlFreeEntityWrapper);
954 }
955 
956 #ifdef LIBXML_TREE_ENABLED
957 /**
958  * xmlCopyEntity:
959  * @ent:  An entity
960  *
961  * Build a copy of an entity
962  *
963  * Returns the new xmlEntitiesPtr or NULL in case of error.
964  */
965 static void *
xmlCopyEntity(void * payload,const xmlChar * name ATTRIBUTE_UNUSED)966 xmlCopyEntity(void *payload, const xmlChar *name ATTRIBUTE_UNUSED) {
967     xmlEntityPtr ent = (xmlEntityPtr) payload;
968     xmlEntityPtr cur;
969 
970     cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
971     if (cur == NULL) {
972         xmlEntitiesErrMemory("xmlCopyEntity:: malloc failed");
973 	return(NULL);
974     }
975     memset(cur, 0, sizeof(xmlEntity));
976     cur->type = XML_ENTITY_DECL;
977 
978     cur->etype = ent->etype;
979     if (ent->name != NULL)
980 	cur->name = xmlStrdup(ent->name);
981     if (ent->ExternalID != NULL)
982 	cur->ExternalID = xmlStrdup(ent->ExternalID);
983     if (ent->SystemID != NULL)
984 	cur->SystemID = xmlStrdup(ent->SystemID);
985     if (ent->content != NULL)
986 	cur->content = xmlStrdup(ent->content);
987     if (ent->orig != NULL)
988 	cur->orig = xmlStrdup(ent->orig);
989     if (ent->URI != NULL)
990 	cur->URI = xmlStrdup(ent->URI);
991     return(cur);
992 }
993 
994 /**
995  * xmlCopyEntitiesTable:
996  * @table:  An entity table
997  *
998  * Build a copy of an entity table.
999  *
1000  * Returns the new xmlEntitiesTablePtr or NULL in case of error.
1001  */
1002 xmlEntitiesTablePtr
xmlCopyEntitiesTable(xmlEntitiesTablePtr table)1003 xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
1004     return(xmlHashCopy(table, xmlCopyEntity));
1005 }
1006 #endif /* LIBXML_TREE_ENABLED */
1007 
1008 #ifdef LIBXML_OUTPUT_ENABLED
1009 
1010 /**
1011  * xmlDumpEntityContent:
1012  * @buf:  An XML buffer.
1013  * @content:  The entity content.
1014  *
1015  * This will dump the quoted string value, taking care of the special
1016  * treatment required by %
1017  */
1018 static void
xmlDumpEntityContent(xmlBufferPtr buf,const xmlChar * content)1019 xmlDumpEntityContent(xmlBufferPtr buf, const xmlChar *content) {
1020     if (buf->alloc == XML_BUFFER_ALLOC_IMMUTABLE) return;
1021     if (xmlStrchr(content, '%')) {
1022         const xmlChar * base, *cur;
1023 
1024 	xmlBufferCCat(buf, "\"");
1025 	base = cur = content;
1026 	while (*cur != 0) {
1027 	    if (*cur == '"') {
1028 		if (base != cur)
1029 		    xmlBufferAdd(buf, base, cur - base);
1030 		xmlBufferAdd(buf, BAD_CAST "&quot;", 6);
1031 		cur++;
1032 		base = cur;
1033 	    } else if (*cur == '%') {
1034 		if (base != cur)
1035 		    xmlBufferAdd(buf, base, cur - base);
1036 		xmlBufferAdd(buf, BAD_CAST "&#x25;", 6);
1037 		cur++;
1038 		base = cur;
1039 	    } else {
1040 		cur++;
1041 	    }
1042 	}
1043 	if (base != cur)
1044 	    xmlBufferAdd(buf, base, cur - base);
1045 	xmlBufferCCat(buf, "\"");
1046     } else {
1047         xmlBufferWriteQuotedString(buf, content);
1048     }
1049 }
1050 
1051 /**
1052  * xmlDumpEntityDecl:
1053  * @buf:  An XML buffer.
1054  * @ent:  An entity table
1055  *
1056  * This will dump the content of the entity table as an XML DTD definition
1057  */
1058 void
xmlDumpEntityDecl(xmlBufferPtr buf,xmlEntityPtr ent)1059 xmlDumpEntityDecl(xmlBufferPtr buf, xmlEntityPtr ent) {
1060     if ((buf == NULL) || (ent == NULL)) return;
1061     switch (ent->etype) {
1062 	case XML_INTERNAL_GENERAL_ENTITY:
1063 	    xmlBufferWriteChar(buf, "<!ENTITY ");
1064 	    xmlBufferWriteCHAR(buf, ent->name);
1065 	    xmlBufferWriteChar(buf, " ");
1066 	    if (ent->orig != NULL)
1067 		xmlBufferWriteQuotedString(buf, ent->orig);
1068 	    else
1069 		xmlDumpEntityContent(buf, ent->content);
1070 	    xmlBufferWriteChar(buf, ">\n");
1071 	    break;
1072 	case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1073 	    xmlBufferWriteChar(buf, "<!ENTITY ");
1074 	    xmlBufferWriteCHAR(buf, ent->name);
1075 	    if (ent->ExternalID != NULL) {
1076 		 xmlBufferWriteChar(buf, " PUBLIC ");
1077 		 xmlBufferWriteQuotedString(buf, ent->ExternalID);
1078 		 xmlBufferWriteChar(buf, " ");
1079 		 xmlBufferWriteQuotedString(buf, ent->SystemID);
1080 	    } else {
1081 		 xmlBufferWriteChar(buf, " SYSTEM ");
1082 		 xmlBufferWriteQuotedString(buf, ent->SystemID);
1083 	    }
1084 	    xmlBufferWriteChar(buf, ">\n");
1085 	    break;
1086 	case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1087 	    xmlBufferWriteChar(buf, "<!ENTITY ");
1088 	    xmlBufferWriteCHAR(buf, ent->name);
1089 	    if (ent->ExternalID != NULL) {
1090 		 xmlBufferWriteChar(buf, " PUBLIC ");
1091 		 xmlBufferWriteQuotedString(buf, ent->ExternalID);
1092 		 xmlBufferWriteChar(buf, " ");
1093 		 xmlBufferWriteQuotedString(buf, ent->SystemID);
1094 	    } else {
1095 		 xmlBufferWriteChar(buf, " SYSTEM ");
1096 		 xmlBufferWriteQuotedString(buf, ent->SystemID);
1097 	    }
1098 	    if (ent->content != NULL) { /* Should be true ! */
1099 		xmlBufferWriteChar(buf, " NDATA ");
1100 		if (ent->orig != NULL)
1101 		    xmlBufferWriteCHAR(buf, ent->orig);
1102 		else
1103 		    xmlBufferWriteCHAR(buf, ent->content);
1104 	    }
1105 	    xmlBufferWriteChar(buf, ">\n");
1106 	    break;
1107 	case XML_INTERNAL_PARAMETER_ENTITY:
1108 	    xmlBufferWriteChar(buf, "<!ENTITY % ");
1109 	    xmlBufferWriteCHAR(buf, ent->name);
1110 	    xmlBufferWriteChar(buf, " ");
1111 	    if (ent->orig == NULL)
1112 		xmlDumpEntityContent(buf, ent->content);
1113 	    else
1114 		xmlBufferWriteQuotedString(buf, ent->orig);
1115 	    xmlBufferWriteChar(buf, ">\n");
1116 	    break;
1117 	case XML_EXTERNAL_PARAMETER_ENTITY:
1118 	    xmlBufferWriteChar(buf, "<!ENTITY % ");
1119 	    xmlBufferWriteCHAR(buf, ent->name);
1120 	    if (ent->ExternalID != NULL) {
1121 		 xmlBufferWriteChar(buf, " PUBLIC ");
1122 		 xmlBufferWriteQuotedString(buf, ent->ExternalID);
1123 		 xmlBufferWriteChar(buf, " ");
1124 		 xmlBufferWriteQuotedString(buf, ent->SystemID);
1125 	    } else {
1126 		 xmlBufferWriteChar(buf, " SYSTEM ");
1127 		 xmlBufferWriteQuotedString(buf, ent->SystemID);
1128 	    }
1129 	    xmlBufferWriteChar(buf, ">\n");
1130 	    break;
1131 	default:
1132 	    xmlEntitiesErr(XML_DTD_UNKNOWN_ENTITY,
1133 		"xmlDumpEntitiesDecl: internal: unknown type entity type");
1134     }
1135 }
1136 
1137 /**
1138  * xmlDumpEntityDeclScan:
1139  * @ent:  An entity table
1140  * @buf:  An XML buffer.
1141  *
1142  * When using the hash table scan function, arguments need to be reversed
1143  */
1144 static void
xmlDumpEntityDeclScan(void * ent,void * buf,const xmlChar * name ATTRIBUTE_UNUSED)1145 xmlDumpEntityDeclScan(void *ent, void *buf,
1146                       const xmlChar *name ATTRIBUTE_UNUSED) {
1147     xmlDumpEntityDecl((xmlBufferPtr) buf, (xmlEntityPtr) ent);
1148 }
1149 
1150 /**
1151  * xmlDumpEntitiesTable:
1152  * @buf:  An XML buffer.
1153  * @table:  An entity table
1154  *
1155  * This will dump the content of the entity table as an XML DTD definition
1156  */
1157 void
xmlDumpEntitiesTable(xmlBufferPtr buf,xmlEntitiesTablePtr table)1158 xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) {
1159     xmlHashScan(table, xmlDumpEntityDeclScan, buf);
1160 }
1161 #endif /* LIBXML_OUTPUT_ENABLED */
1162 #define bottom_entities
1163 #include "elfgcchack.h"
1164