1 /*
2 * entities.c : implementation for the XML entities handling
3 *
4 * See Copyright for the status of this software.
5 *
6 * daniel@veillard.com
7 */
8
9 /* To avoid EBCDIC trouble when parsing on zOS */
10 #if defined(__MVS__)
11 #pragma convert("ISO8859-1")
12 #endif
13
14 #define IN_LIBXML
15 #include "libxml.h"
16
17 #include <string.h>
18 #ifdef HAVE_STDLIB_H
19 #include <stdlib.h>
20 #endif
21 #include <libxml/xmlmemory.h>
22 #include <libxml/hash.h>
23 #include <libxml/entities.h>
24 #include <libxml/parser.h>
25 #include <libxml/parserInternals.h>
26 #include <libxml/xmlerror.h>
27 #include <libxml/globals.h>
28 #include <libxml/dict.h>
29
30 #include "save.h"
31
32 /*
33 * The XML predefined entities.
34 */
35
36 static xmlEntity xmlEntityLt = {
37 NULL, XML_ENTITY_DECL, BAD_CAST "lt",
38 NULL, NULL, NULL, NULL, NULL, NULL,
39 BAD_CAST "<", BAD_CAST "<", 1,
40 XML_INTERNAL_PREDEFINED_ENTITY,
41 NULL, NULL, NULL, NULL, 0, 1
42 };
43 static xmlEntity xmlEntityGt = {
44 NULL, XML_ENTITY_DECL, BAD_CAST "gt",
45 NULL, NULL, NULL, NULL, NULL, NULL,
46 BAD_CAST ">", BAD_CAST ">", 1,
47 XML_INTERNAL_PREDEFINED_ENTITY,
48 NULL, NULL, NULL, NULL, 0, 1
49 };
50 static xmlEntity xmlEntityAmp = {
51 NULL, XML_ENTITY_DECL, BAD_CAST "amp",
52 NULL, NULL, NULL, NULL, NULL, NULL,
53 BAD_CAST "&", BAD_CAST "&", 1,
54 XML_INTERNAL_PREDEFINED_ENTITY,
55 NULL, NULL, NULL, NULL, 0, 1
56 };
57 static xmlEntity xmlEntityQuot = {
58 NULL, XML_ENTITY_DECL, BAD_CAST "quot",
59 NULL, NULL, NULL, NULL, NULL, NULL,
60 BAD_CAST "\"", BAD_CAST "\"", 1,
61 XML_INTERNAL_PREDEFINED_ENTITY,
62 NULL, NULL, NULL, NULL, 0, 1
63 };
64 static xmlEntity xmlEntityApos = {
65 NULL, XML_ENTITY_DECL, BAD_CAST "apos",
66 NULL, NULL, NULL, NULL, NULL, NULL,
67 BAD_CAST "'", BAD_CAST "'", 1,
68 XML_INTERNAL_PREDEFINED_ENTITY,
69 NULL, NULL, NULL, NULL, 0, 1
70 };
71
72 /**
73 * xmlEntitiesErrMemory:
74 * @extra: extra information
75 *
76 * Handle an out of memory condition
77 */
78 static void
xmlEntitiesErrMemory(const char * extra)79 xmlEntitiesErrMemory(const char *extra)
80 {
81 __xmlSimpleError(XML_FROM_TREE, XML_ERR_NO_MEMORY, NULL, NULL, extra);
82 }
83
84 /**
85 * xmlEntitiesErr:
86 * @code: the error code
87 * @msg: the message
88 *
89 * Handle an out of memory condition
90 */
91 static void LIBXML_ATTR_FORMAT(2,0)
xmlEntitiesErr(xmlParserErrors code,const char * msg)92 xmlEntitiesErr(xmlParserErrors code, const char *msg)
93 {
94 __xmlSimpleError(XML_FROM_TREE, code, NULL, msg, NULL);
95 }
96
97 /*
98 * xmlFreeEntity : clean-up an entity record.
99 */
100 static void
xmlFreeEntity(xmlEntityPtr entity)101 xmlFreeEntity(xmlEntityPtr entity)
102 {
103 xmlDictPtr dict = NULL;
104
105 if (entity == NULL)
106 return;
107
108 if (entity->doc != NULL)
109 dict = entity->doc->dict;
110
111
112 if ((entity->children) && (entity->owner == 1) &&
113 (entity == (xmlEntityPtr) entity->children->parent))
114 xmlFreeNodeList(entity->children);
115 if (dict != NULL) {
116 if ((entity->name != NULL) && (!xmlDictOwns(dict, entity->name)))
117 xmlFree((char *) entity->name);
118 if ((entity->ExternalID != NULL) &&
119 (!xmlDictOwns(dict, entity->ExternalID)))
120 xmlFree((char *) entity->ExternalID);
121 if ((entity->SystemID != NULL) &&
122 (!xmlDictOwns(dict, entity->SystemID)))
123 xmlFree((char *) entity->SystemID);
124 if ((entity->URI != NULL) && (!xmlDictOwns(dict, entity->URI)))
125 xmlFree((char *) entity->URI);
126 if ((entity->content != NULL)
127 && (!xmlDictOwns(dict, entity->content)))
128 xmlFree((char *) entity->content);
129 if ((entity->orig != NULL) && (!xmlDictOwns(dict, entity->orig)))
130 xmlFree((char *) entity->orig);
131 } else {
132 if (entity->name != NULL)
133 xmlFree((char *) entity->name);
134 if (entity->ExternalID != NULL)
135 xmlFree((char *) entity->ExternalID);
136 if (entity->SystemID != NULL)
137 xmlFree((char *) entity->SystemID);
138 if (entity->URI != NULL)
139 xmlFree((char *) entity->URI);
140 if (entity->content != NULL)
141 xmlFree((char *) entity->content);
142 if (entity->orig != NULL)
143 xmlFree((char *) entity->orig);
144 }
145 xmlFree(entity);
146 }
147
148 /*
149 * xmlCreateEntity:
150 *
151 * internal routine doing the entity node structures allocations
152 */
153 static xmlEntityPtr
xmlCreateEntity(xmlDictPtr dict,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)154 xmlCreateEntity(xmlDictPtr dict, const xmlChar *name, int type,
155 const xmlChar *ExternalID, const xmlChar *SystemID,
156 const xmlChar *content) {
157 xmlEntityPtr ret;
158
159 ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
160 if (ret == NULL) {
161 xmlEntitiesErrMemory("xmlCreateEntity: malloc failed");
162 return(NULL);
163 }
164 memset(ret, 0, sizeof(xmlEntity));
165 ret->type = XML_ENTITY_DECL;
166 ret->checked = 0;
167
168 /*
169 * fill the structure.
170 */
171 ret->etype = (xmlEntityType) type;
172 if (dict == NULL) {
173 ret->name = xmlStrdup(name);
174 if (ExternalID != NULL)
175 ret->ExternalID = xmlStrdup(ExternalID);
176 if (SystemID != NULL)
177 ret->SystemID = xmlStrdup(SystemID);
178 } else {
179 ret->name = xmlDictLookup(dict, name, -1);
180 if (ExternalID != NULL)
181 ret->ExternalID = xmlDictLookup(dict, ExternalID, -1);
182 if (SystemID != NULL)
183 ret->SystemID = xmlDictLookup(dict, SystemID, -1);
184 }
185 if (content != NULL) {
186 ret->length = xmlStrlen(content);
187 if ((dict != NULL) && (ret->length < 5))
188 ret->content = (xmlChar *)
189 xmlDictLookup(dict, content, ret->length);
190 else
191 ret->content = xmlStrndup(content, ret->length);
192 } else {
193 ret->length = 0;
194 ret->content = NULL;
195 }
196 ret->URI = NULL; /* to be computed by the layer knowing
197 the defining entity */
198 ret->orig = NULL;
199 ret->owner = 0;
200
201 return(ret);
202 }
203
204 /*
205 * xmlAddEntity : register a new entity for an entities table.
206 */
207 static xmlEntityPtr
xmlAddEntity(xmlDtdPtr dtd,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)208 xmlAddEntity(xmlDtdPtr dtd, const xmlChar *name, int type,
209 const xmlChar *ExternalID, const xmlChar *SystemID,
210 const xmlChar *content) {
211 xmlDictPtr dict = NULL;
212 xmlEntitiesTablePtr table = NULL;
213 xmlEntityPtr ret, predef;
214
215 if (name == NULL)
216 return(NULL);
217 if (dtd == NULL)
218 return(NULL);
219 if (dtd->doc != NULL)
220 dict = dtd->doc->dict;
221
222 switch (type) {
223 case XML_INTERNAL_GENERAL_ENTITY:
224 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
225 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
226 predef = xmlGetPredefinedEntity(name);
227 if (predef != NULL) {
228 int valid = 0;
229
230 /* 4.6 Predefined Entities */
231 if ((type == XML_INTERNAL_GENERAL_ENTITY) &&
232 (content != NULL)) {
233 int c = predef->content[0];
234
235 if (((content[0] == c) && (content[1] == 0)) &&
236 ((c == '>') || (c == '\'') || (c == '"'))) {
237 valid = 1;
238 } else if ((content[0] == '&') && (content[1] == '#')) {
239 if (content[2] == 'x') {
240 xmlChar *hex = BAD_CAST "0123456789ABCDEF";
241 xmlChar ref[] = "00;";
242
243 ref[0] = hex[c / 16 % 16];
244 ref[1] = hex[c % 16];
245 if (xmlStrcasecmp(&content[3], ref) == 0)
246 valid = 1;
247 } else {
248 xmlChar ref[] = "00;";
249
250 ref[0] = '0' + c / 10 % 10;
251 ref[1] = '0' + c % 10;
252 if (xmlStrEqual(&content[2], ref))
253 valid = 1;
254 }
255 }
256 }
257 if (!valid) {
258 xmlEntitiesErr(XML_ERR_ENTITY_PROCESSING,
259 "xmlAddEntity: invalid redeclaration of predefined"
260 " entity");
261 return(NULL);
262 }
263 }
264 if (dtd->entities == NULL)
265 dtd->entities = xmlHashCreateDict(0, dict);
266 table = dtd->entities;
267 break;
268 case XML_INTERNAL_PARAMETER_ENTITY:
269 case XML_EXTERNAL_PARAMETER_ENTITY:
270 if (dtd->pentities == NULL)
271 dtd->pentities = xmlHashCreateDict(0, dict);
272 table = dtd->pentities;
273 break;
274 case XML_INTERNAL_PREDEFINED_ENTITY:
275 return(NULL);
276 }
277 if (table == NULL)
278 return(NULL);
279 ret = xmlCreateEntity(dict, name, type, ExternalID, SystemID, content);
280 if (ret == NULL)
281 return(NULL);
282 ret->doc = dtd->doc;
283
284 if (xmlHashAddEntry(table, name, ret)) {
285 /*
286 * entity was already defined at another level.
287 */
288 xmlFreeEntity(ret);
289 return(NULL);
290 }
291 return(ret);
292 }
293
294 /**
295 * xmlGetPredefinedEntity:
296 * @name: the entity name
297 *
298 * Check whether this name is an predefined entity.
299 *
300 * Returns NULL if not, otherwise the entity
301 */
302 xmlEntityPtr
xmlGetPredefinedEntity(const xmlChar * name)303 xmlGetPredefinedEntity(const xmlChar *name) {
304 if (name == NULL) return(NULL);
305 switch (name[0]) {
306 case 'l':
307 if (xmlStrEqual(name, BAD_CAST "lt"))
308 return(&xmlEntityLt);
309 break;
310 case 'g':
311 if (xmlStrEqual(name, BAD_CAST "gt"))
312 return(&xmlEntityGt);
313 break;
314 case 'a':
315 if (xmlStrEqual(name, BAD_CAST "amp"))
316 return(&xmlEntityAmp);
317 if (xmlStrEqual(name, BAD_CAST "apos"))
318 return(&xmlEntityApos);
319 break;
320 case 'q':
321 if (xmlStrEqual(name, BAD_CAST "quot"))
322 return(&xmlEntityQuot);
323 break;
324 default:
325 break;
326 }
327 return(NULL);
328 }
329
330 /**
331 * xmlAddDtdEntity:
332 * @doc: the document
333 * @name: the entity name
334 * @type: the entity type XML_xxx_yyy_ENTITY
335 * @ExternalID: the entity external ID if available
336 * @SystemID: the entity system ID if available
337 * @content: the entity content
338 *
339 * Register a new entity for this document DTD external subset.
340 *
341 * Returns a pointer to the entity or NULL in case of error
342 */
343 xmlEntityPtr
xmlAddDtdEntity(xmlDocPtr doc,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)344 xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type,
345 const xmlChar *ExternalID, const xmlChar *SystemID,
346 const xmlChar *content) {
347 xmlEntityPtr ret;
348 xmlDtdPtr dtd;
349
350 if (doc == NULL) {
351 xmlEntitiesErr(XML_DTD_NO_DOC,
352 "xmlAddDtdEntity: document is NULL");
353 return(NULL);
354 }
355 if (doc->extSubset == NULL) {
356 xmlEntitiesErr(XML_DTD_NO_DTD,
357 "xmlAddDtdEntity: document without external subset");
358 return(NULL);
359 }
360 dtd = doc->extSubset;
361 ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
362 if (ret == NULL) return(NULL);
363
364 /*
365 * Link it to the DTD
366 */
367 ret->parent = dtd;
368 ret->doc = dtd->doc;
369 if (dtd->last == NULL) {
370 dtd->children = dtd->last = (xmlNodePtr) ret;
371 } else {
372 dtd->last->next = (xmlNodePtr) ret;
373 ret->prev = dtd->last;
374 dtd->last = (xmlNodePtr) ret;
375 }
376 return(ret);
377 }
378
379 /**
380 * xmlAddDocEntity:
381 * @doc: the document
382 * @name: the entity name
383 * @type: the entity type XML_xxx_yyy_ENTITY
384 * @ExternalID: the entity external ID if available
385 * @SystemID: the entity system ID if available
386 * @content: the entity content
387 *
388 * Register a new entity for this document.
389 *
390 * Returns a pointer to the entity or NULL in case of error
391 */
392 xmlEntityPtr
xmlAddDocEntity(xmlDocPtr doc,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)393 xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type,
394 const xmlChar *ExternalID, const xmlChar *SystemID,
395 const xmlChar *content) {
396 xmlEntityPtr ret;
397 xmlDtdPtr dtd;
398
399 if (doc == NULL) {
400 xmlEntitiesErr(XML_DTD_NO_DOC,
401 "xmlAddDocEntity: document is NULL");
402 return(NULL);
403 }
404 if (doc->intSubset == NULL) {
405 xmlEntitiesErr(XML_DTD_NO_DTD,
406 "xmlAddDocEntity: document without internal subset");
407 return(NULL);
408 }
409 dtd = doc->intSubset;
410 ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
411 if (ret == NULL) return(NULL);
412
413 /*
414 * Link it to the DTD
415 */
416 ret->parent = dtd;
417 ret->doc = dtd->doc;
418 if (dtd->last == NULL) {
419 dtd->children = dtd->last = (xmlNodePtr) ret;
420 } else {
421 dtd->last->next = (xmlNodePtr) ret;
422 ret->prev = dtd->last;
423 dtd->last = (xmlNodePtr) ret;
424 }
425 return(ret);
426 }
427
428 /**
429 * xmlNewEntity:
430 * @doc: the document
431 * @name: the entity name
432 * @type: the entity type XML_xxx_yyy_ENTITY
433 * @ExternalID: the entity external ID if available
434 * @SystemID: the entity system ID if available
435 * @content: the entity content
436 *
437 * Create a new entity, this differs from xmlAddDocEntity() that if
438 * the document is NULL or has no internal subset defined, then an
439 * unlinked entity structure will be returned, it is then the responsibility
440 * of the caller to link it to the document later or free it when not needed
441 * anymore.
442 *
443 * Returns a pointer to the entity or NULL in case of error
444 */
445 xmlEntityPtr
xmlNewEntity(xmlDocPtr doc,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)446 xmlNewEntity(xmlDocPtr doc, const xmlChar *name, int type,
447 const xmlChar *ExternalID, const xmlChar *SystemID,
448 const xmlChar *content) {
449 xmlEntityPtr ret;
450 xmlDictPtr dict;
451
452 if ((doc != NULL) && (doc->intSubset != NULL)) {
453 return(xmlAddDocEntity(doc, name, type, ExternalID, SystemID, content));
454 }
455 if (doc != NULL)
456 dict = doc->dict;
457 else
458 dict = NULL;
459 ret = xmlCreateEntity(dict, name, type, ExternalID, SystemID, content);
460 if (ret == NULL)
461 return(NULL);
462 ret->doc = doc;
463 return(ret);
464 }
465
466 /**
467 * xmlGetEntityFromTable:
468 * @table: an entity table
469 * @name: the entity name
470 * @parameter: look for parameter entities
471 *
472 * Do an entity lookup in the table.
473 * returns the corresponding parameter entity, if found.
474 *
475 * Returns A pointer to the entity structure or NULL if not found.
476 */
477 static xmlEntityPtr
xmlGetEntityFromTable(xmlEntitiesTablePtr table,const xmlChar * name)478 xmlGetEntityFromTable(xmlEntitiesTablePtr table, const xmlChar *name) {
479 return((xmlEntityPtr) xmlHashLookup(table, name));
480 }
481
482 /**
483 * xmlGetParameterEntity:
484 * @doc: the document referencing the entity
485 * @name: the entity name
486 *
487 * Do an entity lookup in the internal and external subsets and
488 * returns the corresponding parameter entity, if found.
489 *
490 * Returns A pointer to the entity structure or NULL if not found.
491 */
492 xmlEntityPtr
xmlGetParameterEntity(xmlDocPtr doc,const xmlChar * name)493 xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) {
494 xmlEntitiesTablePtr table;
495 xmlEntityPtr ret;
496
497 if (doc == NULL)
498 return(NULL);
499 if ((doc->intSubset != NULL) && (doc->intSubset->pentities != NULL)) {
500 table = (xmlEntitiesTablePtr) doc->intSubset->pentities;
501 ret = xmlGetEntityFromTable(table, name);
502 if (ret != NULL)
503 return(ret);
504 }
505 if ((doc->extSubset != NULL) && (doc->extSubset->pentities != NULL)) {
506 table = (xmlEntitiesTablePtr) doc->extSubset->pentities;
507 return(xmlGetEntityFromTable(table, name));
508 }
509 return(NULL);
510 }
511
512 /**
513 * xmlGetDtdEntity:
514 * @doc: the document referencing the entity
515 * @name: the entity name
516 *
517 * Do an entity lookup in the DTD entity hash table and
518 * returns the corresponding entity, if found.
519 * Note: the first argument is the document node, not the DTD node.
520 *
521 * Returns A pointer to the entity structure or NULL if not found.
522 */
523 xmlEntityPtr
xmlGetDtdEntity(xmlDocPtr doc,const xmlChar * name)524 xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) {
525 xmlEntitiesTablePtr table;
526
527 if (doc == NULL)
528 return(NULL);
529 if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
530 table = (xmlEntitiesTablePtr) doc->extSubset->entities;
531 return(xmlGetEntityFromTable(table, name));
532 }
533 return(NULL);
534 }
535
536 /**
537 * xmlGetDocEntity:
538 * @doc: the document referencing the entity
539 * @name: the entity name
540 *
541 * Do an entity lookup in the document entity hash table and
542 * returns the corresponding entity, otherwise a lookup is done
543 * in the predefined entities too.
544 *
545 * Returns A pointer to the entity structure or NULL if not found.
546 */
547 xmlEntityPtr
xmlGetDocEntity(const xmlDoc * doc,const xmlChar * name)548 xmlGetDocEntity(const xmlDoc *doc, const xmlChar *name) {
549 xmlEntityPtr cur;
550 xmlEntitiesTablePtr table;
551
552 if (doc != NULL) {
553 if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
554 table = (xmlEntitiesTablePtr) doc->intSubset->entities;
555 cur = xmlGetEntityFromTable(table, name);
556 if (cur != NULL)
557 return(cur);
558 }
559 if (doc->standalone != 1) {
560 if ((doc->extSubset != NULL) &&
561 (doc->extSubset->entities != NULL)) {
562 table = (xmlEntitiesTablePtr) doc->extSubset->entities;
563 cur = xmlGetEntityFromTable(table, name);
564 if (cur != NULL)
565 return(cur);
566 }
567 }
568 }
569 return(xmlGetPredefinedEntity(name));
570 }
571
572 /*
573 * Macro used to grow the current buffer.
574 */
575 #define growBufferReentrant() { \
576 xmlChar *tmp; \
577 size_t new_size = buffer_size * 2; \
578 if (new_size < buffer_size) goto mem_error; \
579 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
580 if (tmp == NULL) goto mem_error; \
581 buffer = tmp; \
582 buffer_size = new_size; \
583 }
584
585 /**
586 * xmlEncodeEntitiesInternal:
587 * @doc: the document containing the string
588 * @input: A string to convert to XML.
589 * @attr: are we handling an attribute value
590 *
591 * Do a global encoding of a string, replacing the predefined entities
592 * and non ASCII values with their entities and CharRef counterparts.
593 * Contrary to xmlEncodeEntities, this routine is reentrant, and result
594 * must be deallocated.
595 *
596 * Returns A newly allocated string with the substitution done.
597 */
598 static xmlChar *
xmlEncodeEntitiesInternal(xmlDocPtr doc,const xmlChar * input,int attr)599 xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) {
600 const xmlChar *cur = input;
601 xmlChar *buffer = NULL;
602 xmlChar *out = NULL;
603 size_t buffer_size = 0;
604 int html = 0;
605
606 if (input == NULL) return(NULL);
607 if (doc != NULL)
608 html = (doc->type == XML_HTML_DOCUMENT_NODE);
609
610 /*
611 * allocate an translation buffer.
612 */
613 buffer_size = 1000;
614 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
615 if (buffer == NULL) {
616 xmlEntitiesErrMemory("xmlEncodeEntities: malloc failed");
617 return(NULL);
618 }
619 out = buffer;
620
621 while (*cur != '\0') {
622 size_t indx = out - buffer;
623 if (indx + 100 > buffer_size) {
624
625 growBufferReentrant();
626 out = &buffer[indx];
627 }
628
629 /*
630 * By default one have to encode at least '<', '>', '"' and '&' !
631 */
632 if (*cur == '<') {
633 const xmlChar *end;
634
635 /*
636 * Special handling of server side include in HTML attributes
637 */
638 if (html && attr &&
639 (cur[1] == '!') && (cur[2] == '-') && (cur[3] == '-') &&
640 ((end = xmlStrstr(cur, BAD_CAST "-->")) != NULL)) {
641 while (cur != end) {
642 *out++ = *cur++;
643 indx = out - buffer;
644 if (indx + 100 > buffer_size) {
645 growBufferReentrant();
646 out = &buffer[indx];
647 }
648 }
649 *out++ = *cur++;
650 *out++ = *cur++;
651 *out++ = *cur++;
652 continue;
653 }
654 *out++ = '&';
655 *out++ = 'l';
656 *out++ = 't';
657 *out++ = ';';
658 } else if (*cur == '>') {
659 *out++ = '&';
660 *out++ = 'g';
661 *out++ = 't';
662 *out++ = ';';
663 } else if (*cur == '&') {
664 /*
665 * Special handling of &{...} construct from HTML 4, see
666 * http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1
667 */
668 if (html && attr && (cur[1] == '{') &&
669 (strchr((const char *) cur, '}'))) {
670 while (*cur != '}') {
671 *out++ = *cur++;
672 indx = out - buffer;
673 if (indx + 100 > buffer_size) {
674 growBufferReentrant();
675 out = &buffer[indx];
676 }
677 }
678 *out++ = *cur++;
679 continue;
680 }
681 *out++ = '&';
682 *out++ = 'a';
683 *out++ = 'm';
684 *out++ = 'p';
685 *out++ = ';';
686 } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
687 (*cur == '\n') || (*cur == '\t') || ((html) && (*cur == '\r'))) {
688 /*
689 * default case, just copy !
690 */
691 *out++ = *cur;
692 } else if (*cur >= 0x80) {
693 if (((doc != NULL) && (doc->encoding != NULL)) || (html)) {
694 /*
695 * Bjørn Reese <br@sseusa.com> provided the patch
696 xmlChar xc;
697 xc = (*cur & 0x3F) << 6;
698 if (cur[1] != 0) {
699 xc += *(++cur) & 0x3F;
700 *out++ = xc;
701 } else
702 */
703 *out++ = *cur;
704 } else {
705 /*
706 * We assume we have UTF-8 input.
707 * It must match either:
708 * 110xxxxx 10xxxxxx
709 * 1110xxxx 10xxxxxx 10xxxxxx
710 * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
711 * That is:
712 * cur[0] is 11xxxxxx
713 * cur[1] is 10xxxxxx
714 * cur[2] is 10xxxxxx if cur[0] is 111xxxxx
715 * cur[3] is 10xxxxxx if cur[0] is 1111xxxx
716 * cur[0] is not 11111xxx
717 */
718 char buf[11], *ptr;
719 int val = 0, l = 1;
720
721 if (((cur[0] & 0xC0) != 0xC0) ||
722 ((cur[1] & 0xC0) != 0x80) ||
723 (((cur[0] & 0xE0) == 0xE0) && ((cur[2] & 0xC0) != 0x80)) ||
724 (((cur[0] & 0xF0) == 0xF0) && ((cur[3] & 0xC0) != 0x80)) ||
725 (((cur[0] & 0xF8) == 0xF8))) {
726 xmlEntitiesErr(XML_CHECK_NOT_UTF8,
727 "xmlEncodeEntities: input not UTF-8");
728 if (doc != NULL)
729 doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
730 snprintf(buf, sizeof(buf), "&#%d;", *cur);
731 buf[sizeof(buf) - 1] = 0;
732 ptr = buf;
733 while (*ptr != 0) *out++ = *ptr++;
734 cur++;
735 continue;
736 } else if (*cur < 0xE0) {
737 val = (cur[0]) & 0x1F;
738 val <<= 6;
739 val |= (cur[1]) & 0x3F;
740 l = 2;
741 } else if (*cur < 0xF0) {
742 val = (cur[0]) & 0x0F;
743 val <<= 6;
744 val |= (cur[1]) & 0x3F;
745 val <<= 6;
746 val |= (cur[2]) & 0x3F;
747 l = 3;
748 } else if (*cur < 0xF8) {
749 val = (cur[0]) & 0x07;
750 val <<= 6;
751 val |= (cur[1]) & 0x3F;
752 val <<= 6;
753 val |= (cur[2]) & 0x3F;
754 val <<= 6;
755 val |= (cur[3]) & 0x3F;
756 l = 4;
757 }
758 if ((l == 1) || (!IS_CHAR(val))) {
759 xmlEntitiesErr(XML_ERR_INVALID_CHAR,
760 "xmlEncodeEntities: char out of range\n");
761 if (doc != NULL)
762 doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
763 snprintf(buf, sizeof(buf), "&#%d;", *cur);
764 buf[sizeof(buf) - 1] = 0;
765 ptr = buf;
766 while (*ptr != 0) *out++ = *ptr++;
767 cur++;
768 continue;
769 }
770 /*
771 * We could do multiple things here. Just save as a char ref
772 */
773 snprintf(buf, sizeof(buf), "&#x%X;", val);
774 buf[sizeof(buf) - 1] = 0;
775 ptr = buf;
776 while (*ptr != 0) *out++ = *ptr++;
777 cur += l;
778 continue;
779 }
780 } else if (IS_BYTE_CHAR(*cur)) {
781 char buf[11], *ptr;
782
783 snprintf(buf, sizeof(buf), "&#%d;", *cur);
784 buf[sizeof(buf) - 1] = 0;
785 ptr = buf;
786 while (*ptr != 0) *out++ = *ptr++;
787 }
788 cur++;
789 }
790 *out = 0;
791 return(buffer);
792
793 mem_error:
794 xmlEntitiesErrMemory("xmlEncodeEntities: realloc failed");
795 xmlFree(buffer);
796 return(NULL);
797 }
798
799 /**
800 * xmlEncodeAttributeEntities:
801 * @doc: the document containing the string
802 * @input: A string to convert to XML.
803 *
804 * Do a global encoding of a string, replacing the predefined entities
805 * and non ASCII values with their entities and CharRef counterparts for
806 * attribute values.
807 *
808 * Returns A newly allocated string with the substitution done.
809 */
810 xmlChar *
xmlEncodeAttributeEntities(xmlDocPtr doc,const xmlChar * input)811 xmlEncodeAttributeEntities(xmlDocPtr doc, const xmlChar *input) {
812 return xmlEncodeEntitiesInternal(doc, input, 1);
813 }
814
815 /**
816 * xmlEncodeEntitiesReentrant:
817 * @doc: the document containing the string
818 * @input: A string to convert to XML.
819 *
820 * Do a global encoding of a string, replacing the predefined entities
821 * and non ASCII values with their entities and CharRef counterparts.
822 * Contrary to xmlEncodeEntities, this routine is reentrant, and result
823 * must be deallocated.
824 *
825 * Returns A newly allocated string with the substitution done.
826 */
827 xmlChar *
xmlEncodeEntitiesReentrant(xmlDocPtr doc,const xmlChar * input)828 xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
829 return xmlEncodeEntitiesInternal(doc, input, 0);
830 }
831
832 /**
833 * xmlEncodeSpecialChars:
834 * @doc: the document containing the string
835 * @input: A string to convert to XML.
836 *
837 * Do a global encoding of a string, replacing the predefined entities
838 * this routine is reentrant, and result must be deallocated.
839 *
840 * Returns A newly allocated string with the substitution done.
841 */
842 xmlChar *
xmlEncodeSpecialChars(const xmlDoc * doc ATTRIBUTE_UNUSED,const xmlChar * input)843 xmlEncodeSpecialChars(const xmlDoc *doc ATTRIBUTE_UNUSED, const xmlChar *input) {
844 const xmlChar *cur = input;
845 xmlChar *buffer = NULL;
846 xmlChar *out = NULL;
847 size_t buffer_size = 0;
848 if (input == NULL) return(NULL);
849
850 /*
851 * allocate an translation buffer.
852 */
853 buffer_size = 1000;
854 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
855 if (buffer == NULL) {
856 xmlEntitiesErrMemory("xmlEncodeSpecialChars: malloc failed");
857 return(NULL);
858 }
859 out = buffer;
860
861 while (*cur != '\0') {
862 size_t indx = out - buffer;
863 if (indx + 10 > buffer_size) {
864
865 growBufferReentrant();
866 out = &buffer[indx];
867 }
868
869 /*
870 * By default one have to encode at least '<', '>', '"' and '&' !
871 */
872 if (*cur == '<') {
873 *out++ = '&';
874 *out++ = 'l';
875 *out++ = 't';
876 *out++ = ';';
877 } else if (*cur == '>') {
878 *out++ = '&';
879 *out++ = 'g';
880 *out++ = 't';
881 *out++ = ';';
882 } else if (*cur == '&') {
883 *out++ = '&';
884 *out++ = 'a';
885 *out++ = 'm';
886 *out++ = 'p';
887 *out++ = ';';
888 } else if (*cur == '"') {
889 *out++ = '&';
890 *out++ = 'q';
891 *out++ = 'u';
892 *out++ = 'o';
893 *out++ = 't';
894 *out++ = ';';
895 } else if (*cur == '\r') {
896 *out++ = '&';
897 *out++ = '#';
898 *out++ = '1';
899 *out++ = '3';
900 *out++ = ';';
901 } else {
902 /*
903 * Works because on UTF-8, all extended sequences cannot
904 * result in bytes in the ASCII range.
905 */
906 *out++ = *cur;
907 }
908 cur++;
909 }
910 *out = 0;
911 return(buffer);
912
913 mem_error:
914 xmlEntitiesErrMemory("xmlEncodeSpecialChars: realloc failed");
915 xmlFree(buffer);
916 return(NULL);
917 }
918
919 /**
920 * xmlCreateEntitiesTable:
921 *
922 * create and initialize an empty entities hash table.
923 * This really doesn't make sense and should be deprecated
924 *
925 * Returns the xmlEntitiesTablePtr just created or NULL in case of error.
926 */
927 xmlEntitiesTablePtr
xmlCreateEntitiesTable(void)928 xmlCreateEntitiesTable(void) {
929 return((xmlEntitiesTablePtr) xmlHashCreate(0));
930 }
931
932 /**
933 * xmlFreeEntityWrapper:
934 * @entity: An entity
935 * @name: its name
936 *
937 * Deallocate the memory used by an entities in the hash table.
938 */
939 static void
xmlFreeEntityWrapper(void * entity,const xmlChar * name ATTRIBUTE_UNUSED)940 xmlFreeEntityWrapper(void *entity, const xmlChar *name ATTRIBUTE_UNUSED) {
941 if (entity != NULL)
942 xmlFreeEntity((xmlEntityPtr) entity);
943 }
944
945 /**
946 * xmlFreeEntitiesTable:
947 * @table: An entity table
948 *
949 * Deallocate the memory used by an entities hash table.
950 */
951 void
xmlFreeEntitiesTable(xmlEntitiesTablePtr table)952 xmlFreeEntitiesTable(xmlEntitiesTablePtr table) {
953 xmlHashFree(table, xmlFreeEntityWrapper);
954 }
955
956 #ifdef LIBXML_TREE_ENABLED
957 /**
958 * xmlCopyEntity:
959 * @ent: An entity
960 *
961 * Build a copy of an entity
962 *
963 * Returns the new xmlEntitiesPtr or NULL in case of error.
964 */
965 static void *
xmlCopyEntity(void * payload,const xmlChar * name ATTRIBUTE_UNUSED)966 xmlCopyEntity(void *payload, const xmlChar *name ATTRIBUTE_UNUSED) {
967 xmlEntityPtr ent = (xmlEntityPtr) payload;
968 xmlEntityPtr cur;
969
970 cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
971 if (cur == NULL) {
972 xmlEntitiesErrMemory("xmlCopyEntity:: malloc failed");
973 return(NULL);
974 }
975 memset(cur, 0, sizeof(xmlEntity));
976 cur->type = XML_ENTITY_DECL;
977
978 cur->etype = ent->etype;
979 if (ent->name != NULL)
980 cur->name = xmlStrdup(ent->name);
981 if (ent->ExternalID != NULL)
982 cur->ExternalID = xmlStrdup(ent->ExternalID);
983 if (ent->SystemID != NULL)
984 cur->SystemID = xmlStrdup(ent->SystemID);
985 if (ent->content != NULL)
986 cur->content = xmlStrdup(ent->content);
987 if (ent->orig != NULL)
988 cur->orig = xmlStrdup(ent->orig);
989 if (ent->URI != NULL)
990 cur->URI = xmlStrdup(ent->URI);
991 return(cur);
992 }
993
994 /**
995 * xmlCopyEntitiesTable:
996 * @table: An entity table
997 *
998 * Build a copy of an entity table.
999 *
1000 * Returns the new xmlEntitiesTablePtr or NULL in case of error.
1001 */
1002 xmlEntitiesTablePtr
xmlCopyEntitiesTable(xmlEntitiesTablePtr table)1003 xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
1004 return(xmlHashCopy(table, xmlCopyEntity));
1005 }
1006 #endif /* LIBXML_TREE_ENABLED */
1007
1008 #ifdef LIBXML_OUTPUT_ENABLED
1009
1010 /**
1011 * xmlDumpEntityContent:
1012 * @buf: An XML buffer.
1013 * @content: The entity content.
1014 *
1015 * This will dump the quoted string value, taking care of the special
1016 * treatment required by %
1017 */
1018 static void
xmlDumpEntityContent(xmlBufferPtr buf,const xmlChar * content)1019 xmlDumpEntityContent(xmlBufferPtr buf, const xmlChar *content) {
1020 if (buf->alloc == XML_BUFFER_ALLOC_IMMUTABLE) return;
1021 if (xmlStrchr(content, '%')) {
1022 const xmlChar * base, *cur;
1023
1024 xmlBufferCCat(buf, "\"");
1025 base = cur = content;
1026 while (*cur != 0) {
1027 if (*cur == '"') {
1028 if (base != cur)
1029 xmlBufferAdd(buf, base, cur - base);
1030 xmlBufferAdd(buf, BAD_CAST """, 6);
1031 cur++;
1032 base = cur;
1033 } else if (*cur == '%') {
1034 if (base != cur)
1035 xmlBufferAdd(buf, base, cur - base);
1036 xmlBufferAdd(buf, BAD_CAST "%", 6);
1037 cur++;
1038 base = cur;
1039 } else {
1040 cur++;
1041 }
1042 }
1043 if (base != cur)
1044 xmlBufferAdd(buf, base, cur - base);
1045 xmlBufferCCat(buf, "\"");
1046 } else {
1047 xmlBufferWriteQuotedString(buf, content);
1048 }
1049 }
1050
1051 /**
1052 * xmlDumpEntityDecl:
1053 * @buf: An XML buffer.
1054 * @ent: An entity table
1055 *
1056 * This will dump the content of the entity table as an XML DTD definition
1057 */
1058 void
xmlDumpEntityDecl(xmlBufferPtr buf,xmlEntityPtr ent)1059 xmlDumpEntityDecl(xmlBufferPtr buf, xmlEntityPtr ent) {
1060 if ((buf == NULL) || (ent == NULL)) return;
1061 switch (ent->etype) {
1062 case XML_INTERNAL_GENERAL_ENTITY:
1063 xmlBufferWriteChar(buf, "<!ENTITY ");
1064 xmlBufferWriteCHAR(buf, ent->name);
1065 xmlBufferWriteChar(buf, " ");
1066 if (ent->orig != NULL)
1067 xmlBufferWriteQuotedString(buf, ent->orig);
1068 else
1069 xmlDumpEntityContent(buf, ent->content);
1070 xmlBufferWriteChar(buf, ">\n");
1071 break;
1072 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1073 xmlBufferWriteChar(buf, "<!ENTITY ");
1074 xmlBufferWriteCHAR(buf, ent->name);
1075 if (ent->ExternalID != NULL) {
1076 xmlBufferWriteChar(buf, " PUBLIC ");
1077 xmlBufferWriteQuotedString(buf, ent->ExternalID);
1078 xmlBufferWriteChar(buf, " ");
1079 xmlBufferWriteQuotedString(buf, ent->SystemID);
1080 } else {
1081 xmlBufferWriteChar(buf, " SYSTEM ");
1082 xmlBufferWriteQuotedString(buf, ent->SystemID);
1083 }
1084 xmlBufferWriteChar(buf, ">\n");
1085 break;
1086 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1087 xmlBufferWriteChar(buf, "<!ENTITY ");
1088 xmlBufferWriteCHAR(buf, ent->name);
1089 if (ent->ExternalID != NULL) {
1090 xmlBufferWriteChar(buf, " PUBLIC ");
1091 xmlBufferWriteQuotedString(buf, ent->ExternalID);
1092 xmlBufferWriteChar(buf, " ");
1093 xmlBufferWriteQuotedString(buf, ent->SystemID);
1094 } else {
1095 xmlBufferWriteChar(buf, " SYSTEM ");
1096 xmlBufferWriteQuotedString(buf, ent->SystemID);
1097 }
1098 if (ent->content != NULL) { /* Should be true ! */
1099 xmlBufferWriteChar(buf, " NDATA ");
1100 if (ent->orig != NULL)
1101 xmlBufferWriteCHAR(buf, ent->orig);
1102 else
1103 xmlBufferWriteCHAR(buf, ent->content);
1104 }
1105 xmlBufferWriteChar(buf, ">\n");
1106 break;
1107 case XML_INTERNAL_PARAMETER_ENTITY:
1108 xmlBufferWriteChar(buf, "<!ENTITY % ");
1109 xmlBufferWriteCHAR(buf, ent->name);
1110 xmlBufferWriteChar(buf, " ");
1111 if (ent->orig == NULL)
1112 xmlDumpEntityContent(buf, ent->content);
1113 else
1114 xmlBufferWriteQuotedString(buf, ent->orig);
1115 xmlBufferWriteChar(buf, ">\n");
1116 break;
1117 case XML_EXTERNAL_PARAMETER_ENTITY:
1118 xmlBufferWriteChar(buf, "<!ENTITY % ");
1119 xmlBufferWriteCHAR(buf, ent->name);
1120 if (ent->ExternalID != NULL) {
1121 xmlBufferWriteChar(buf, " PUBLIC ");
1122 xmlBufferWriteQuotedString(buf, ent->ExternalID);
1123 xmlBufferWriteChar(buf, " ");
1124 xmlBufferWriteQuotedString(buf, ent->SystemID);
1125 } else {
1126 xmlBufferWriteChar(buf, " SYSTEM ");
1127 xmlBufferWriteQuotedString(buf, ent->SystemID);
1128 }
1129 xmlBufferWriteChar(buf, ">\n");
1130 break;
1131 default:
1132 xmlEntitiesErr(XML_DTD_UNKNOWN_ENTITY,
1133 "xmlDumpEntitiesDecl: internal: unknown type entity type");
1134 }
1135 }
1136
1137 /**
1138 * xmlDumpEntityDeclScan:
1139 * @ent: An entity table
1140 * @buf: An XML buffer.
1141 *
1142 * When using the hash table scan function, arguments need to be reversed
1143 */
1144 static void
xmlDumpEntityDeclScan(void * ent,void * buf,const xmlChar * name ATTRIBUTE_UNUSED)1145 xmlDumpEntityDeclScan(void *ent, void *buf,
1146 const xmlChar *name ATTRIBUTE_UNUSED) {
1147 xmlDumpEntityDecl((xmlBufferPtr) buf, (xmlEntityPtr) ent);
1148 }
1149
1150 /**
1151 * xmlDumpEntitiesTable:
1152 * @buf: An XML buffer.
1153 * @table: An entity table
1154 *
1155 * This will dump the content of the entity table as an XML DTD definition
1156 */
1157 void
xmlDumpEntitiesTable(xmlBufferPtr buf,xmlEntitiesTablePtr table)1158 xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) {
1159 xmlHashScan(table, xmlDumpEntityDeclScan, buf);
1160 }
1161 #endif /* LIBXML_OUTPUT_ENABLED */
1162 #define bottom_entities
1163 #include "elfgcchack.h"
1164