1 /*
2  * encoding.c : implements the encoding conversion functions needed for XML
3  *
4  * Related specs:
5  * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6  * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7  * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8  * [ISO-8859-1]   ISO Latin-1 characters codes.
9  * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10  *                Worldwide Character Encoding -- Version 1.0", Addison-
11  *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12  *                described in Unicode Technical Report #4.
13  * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14  *                Information Interchange, ANSI X3.4-1986.
15  *
16  * See Copyright for the status of this software.
17  *
18  * daniel@veillard.com
19  *
20  * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21  */
22 
23 #define IN_LIBXML
24 #include "libxml.h"
25 
26 #include <string.h>
27 #include <limits.h>
28 
29 #ifdef HAVE_CTYPE_H
30 #include <ctype.h>
31 #endif
32 #ifdef HAVE_STDLIB_H
33 #include <stdlib.h>
34 #endif
35 #ifdef LIBXML_ICONV_ENABLED
36 #ifdef HAVE_ERRNO_H
37 #include <errno.h>
38 #endif
39 #endif
40 #include <libxml/encoding.h>
41 #include <libxml/xmlmemory.h>
42 #ifdef LIBXML_HTML_ENABLED
43 #include <libxml/HTMLparser.h>
44 #endif
45 #include <libxml/globals.h>
46 #include <libxml/xmlerror.h>
47 
48 #include "buf.h"
49 #include "enc.h"
50 
51 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
52 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
53 
54 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
55 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
56 struct _xmlCharEncodingAlias {
57     const char *name;
58     const char *alias;
59 };
60 
61 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
62 static int xmlCharEncodingAliasesNb = 0;
63 static int xmlCharEncodingAliasesMax = 0;
64 
65 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
66 #if 0
67 #define DEBUG_ENCODING  /* Define this to get encoding traces */
68 #endif
69 #else
70 #ifdef LIBXML_ISO8859X_ENABLED
71 static void xmlRegisterCharEncodingHandlersISO8859x (void);
72 #endif
73 #endif
74 
75 static int xmlLittleEndian = 1;
76 
77 /**
78  * xmlEncodingErrMemory:
79  * @extra:  extra information
80  *
81  * Handle an out of memory condition
82  */
83 static void
xmlEncodingErrMemory(const char * extra)84 xmlEncodingErrMemory(const char *extra)
85 {
86     __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
87 }
88 
89 /**
90  * xmlErrEncoding:
91  * @error:  the error number
92  * @msg:  the error message
93  *
94  * n encoding error
95  */
96 static void LIBXML_ATTR_FORMAT(2,0)
xmlEncodingErr(xmlParserErrors error,const char * msg,const char * val)97 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
98 {
99     __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
100                     XML_FROM_I18N, error, XML_ERR_FATAL,
101                     NULL, 0, val, NULL, NULL, 0, 0, msg, val);
102 }
103 
104 #ifdef LIBXML_ICU_ENABLED
105 static uconv_t*
openIcuConverter(const char * name,int toUnicode)106 openIcuConverter(const char* name, int toUnicode)
107 {
108   UErrorCode status = U_ZERO_ERROR;
109   uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
110   if (conv == NULL)
111     return NULL;
112 
113   conv->pivot_source = conv->pivot_buf;
114   conv->pivot_target = conv->pivot_buf;
115 
116   conv->uconv = ucnv_open(name, &status);
117   if (U_FAILURE(status))
118     goto error;
119 
120   status = U_ZERO_ERROR;
121   if (toUnicode) {
122     ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
123                         NULL, NULL, NULL, &status);
124   }
125   else {
126     ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
127                         NULL, NULL, NULL, &status);
128   }
129   if (U_FAILURE(status))
130     goto error;
131 
132   status = U_ZERO_ERROR;
133   conv->utf8 = ucnv_open("UTF-8", &status);
134   if (U_SUCCESS(status))
135     return conv;
136 
137 error:
138   if (conv->uconv)
139     ucnv_close(conv->uconv);
140   xmlFree(conv);
141   return NULL;
142 }
143 
144 static void
closeIcuConverter(uconv_t * conv)145 closeIcuConverter(uconv_t *conv)
146 {
147   if (conv != NULL) {
148     ucnv_close(conv->uconv);
149     ucnv_close(conv->utf8);
150     xmlFree(conv);
151   }
152 }
153 #endif /* LIBXML_ICU_ENABLED */
154 
155 /************************************************************************
156  *									*
157  *		Conversions To/From UTF8 encoding			*
158  *									*
159  ************************************************************************/
160 
161 /**
162  * asciiToUTF8:
163  * @out:  a pointer to an array of bytes to store the result
164  * @outlen:  the length of @out
165  * @in:  a pointer to an array of ASCII chars
166  * @inlen:  the length of @in
167  *
168  * Take a block of ASCII chars in and try to convert it to an UTF-8
169  * block of chars out.
170  * Returns 0 if success, or -1 otherwise
171  * The value of @inlen after return is the number of octets consumed
172  *     if the return value is positive, else unpredictable.
173  * The value of @outlen after return is the number of octets produced.
174  */
175 static int
asciiToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)176 asciiToUTF8(unsigned char* out, int *outlen,
177               const unsigned char* in, int *inlen) {
178     unsigned char* outstart = out;
179     const unsigned char* base = in;
180     const unsigned char* processed = in;
181     unsigned char* outend = out + *outlen;
182     const unsigned char* inend;
183     unsigned int c;
184 
185     inend = in + (*inlen);
186     while ((in < inend) && (out - outstart + 5 < *outlen)) {
187 	c= *in++;
188 
189         if (out >= outend)
190 	    break;
191         if (c < 0x80) {
192 	    *out++ = c;
193 	} else {
194 	    *outlen = out - outstart;
195 	    *inlen = processed - base;
196 	    return(-1);
197 	}
198 
199 	processed = (const unsigned char*) in;
200     }
201     *outlen = out - outstart;
202     *inlen = processed - base;
203     return(*outlen);
204 }
205 
206 #ifdef LIBXML_OUTPUT_ENABLED
207 /**
208  * UTF8Toascii:
209  * @out:  a pointer to an array of bytes to store the result
210  * @outlen:  the length of @out
211  * @in:  a pointer to an array of UTF-8 chars
212  * @inlen:  the length of @in
213  *
214  * Take a block of UTF-8 chars in and try to convert it to an ASCII
215  * block of chars out.
216  *
217  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
218  * The value of @inlen after return is the number of octets consumed
219  *     if the return value is positive, else unpredictable.
220  * The value of @outlen after return is the number of octets produced.
221  */
222 static int
UTF8Toascii(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)223 UTF8Toascii(unsigned char* out, int *outlen,
224               const unsigned char* in, int *inlen) {
225     const unsigned char* processed = in;
226     const unsigned char* outend;
227     const unsigned char* outstart = out;
228     const unsigned char* instart = in;
229     const unsigned char* inend;
230     unsigned int c, d;
231     int trailing;
232 
233     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
234     if (in == NULL) {
235         /*
236 	 * initialization nothing to do
237 	 */
238 	*outlen = 0;
239 	*inlen = 0;
240 	return(0);
241     }
242     inend = in + (*inlen);
243     outend = out + (*outlen);
244     while (in < inend) {
245 	d = *in++;
246 	if      (d < 0x80)  { c= d; trailing= 0; }
247 	else if (d < 0xC0) {
248 	    /* trailing byte in leading position */
249 	    *outlen = out - outstart;
250 	    *inlen = processed - instart;
251 	    return(-2);
252         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
253         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
254         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
255 	else {
256 	    /* no chance for this in Ascii */
257 	    *outlen = out - outstart;
258 	    *inlen = processed - instart;
259 	    return(-2);
260 	}
261 
262 	if (inend - in < trailing) {
263 	    break;
264 	}
265 
266 	for ( ; trailing; trailing--) {
267 	    if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
268 		break;
269 	    c <<= 6;
270 	    c |= d & 0x3F;
271 	}
272 
273 	/* assertion: c is a single UTF-4 value */
274 	if (c < 0x80) {
275 	    if (out >= outend)
276 		break;
277 	    *out++ = c;
278 	} else {
279 	    /* no chance for this in Ascii */
280 	    *outlen = out - outstart;
281 	    *inlen = processed - instart;
282 	    return(-2);
283 	}
284 	processed = in;
285     }
286     *outlen = out - outstart;
287     *inlen = processed - instart;
288     return(*outlen);
289 }
290 #endif /* LIBXML_OUTPUT_ENABLED */
291 
292 /**
293  * isolat1ToUTF8:
294  * @out:  a pointer to an array of bytes to store the result
295  * @outlen:  the length of @out
296  * @in:  a pointer to an array of ISO Latin 1 chars
297  * @inlen:  the length of @in
298  *
299  * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
300  * block of chars out.
301  * Returns the number of bytes written if success, or -1 otherwise
302  * The value of @inlen after return is the number of octets consumed
303  *     if the return value is positive, else unpredictable.
304  * The value of @outlen after return is the number of octets produced.
305  */
306 int
isolat1ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)307 isolat1ToUTF8(unsigned char* out, int *outlen,
308               const unsigned char* in, int *inlen) {
309     unsigned char* outstart = out;
310     const unsigned char* base = in;
311     unsigned char* outend;
312     const unsigned char* inend;
313     const unsigned char* instop;
314 
315     if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
316 	return(-1);
317 
318     outend = out + *outlen;
319     inend = in + (*inlen);
320     instop = inend;
321 
322     while ((in < inend) && (out < outend - 1)) {
323 	if (*in >= 0x80) {
324 	    *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
325             *out++ = ((*in) & 0x3F) | 0x80;
326 	    ++in;
327 	}
328 	if ((instop - in) > (outend - out)) instop = in + (outend - out);
329 	while ((in < instop) && (*in < 0x80)) {
330 	    *out++ = *in++;
331 	}
332     }
333     if ((in < inend) && (out < outend) && (*in < 0x80)) {
334         *out++ = *in++;
335     }
336     *outlen = out - outstart;
337     *inlen = in - base;
338     return(*outlen);
339 }
340 
341 /**
342  * UTF8ToUTF8:
343  * @out:  a pointer to an array of bytes to store the result
344  * @outlen:  the length of @out
345  * @inb:  a pointer to an array of UTF-8 chars
346  * @inlenb:  the length of @in in UTF-8 chars
347  *
348  * No op copy operation for UTF8 handling.
349  *
350  * Returns the number of bytes written, or -1 if lack of space.
351  *     The value of *inlen after return is the number of octets consumed
352  *     if the return value is positive, else unpredictable.
353  */
354 static int
UTF8ToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)355 UTF8ToUTF8(unsigned char* out, int *outlen,
356            const unsigned char* inb, int *inlenb)
357 {
358     int len;
359 
360     if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
361 	return(-1);
362     if (inb == NULL) {
363         /* inb == NULL means output is initialized. */
364         *outlen = 0;
365         *inlenb = 0;
366         return(0);
367     }
368     if (*outlen > *inlenb) {
369 	len = *inlenb;
370     } else {
371 	len = *outlen;
372     }
373     if (len < 0)
374 	return(-1);
375 
376     /*
377      * FIXME: Conversion functions must assure valid UTF-8, so we have
378      * to check for UTF-8 validity. Preferably, this converter shouldn't
379      * be used at all.
380      */
381     memcpy(out, inb, len);
382 
383     *outlen = len;
384     *inlenb = len;
385     return(*outlen);
386 }
387 
388 
389 #ifdef LIBXML_OUTPUT_ENABLED
390 /**
391  * UTF8Toisolat1:
392  * @out:  a pointer to an array of bytes to store the result
393  * @outlen:  the length of @out
394  * @in:  a pointer to an array of UTF-8 chars
395  * @inlen:  the length of @in
396  *
397  * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
398  * block of chars out.
399  *
400  * Returns the number of bytes written if success, -2 if the transcoding fails,
401            or -1 otherwise
402  * The value of @inlen after return is the number of octets consumed
403  *     if the return value is positive, else unpredictable.
404  * The value of @outlen after return is the number of octets produced.
405  */
406 int
UTF8Toisolat1(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)407 UTF8Toisolat1(unsigned char* out, int *outlen,
408               const unsigned char* in, int *inlen) {
409     const unsigned char* processed = in;
410     const unsigned char* outend;
411     const unsigned char* outstart = out;
412     const unsigned char* instart = in;
413     const unsigned char* inend;
414     unsigned int c, d;
415     int trailing;
416 
417     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
418     if (in == NULL) {
419         /*
420 	 * initialization nothing to do
421 	 */
422 	*outlen = 0;
423 	*inlen = 0;
424 	return(0);
425     }
426     inend = in + (*inlen);
427     outend = out + (*outlen);
428     while (in < inend) {
429 	d = *in++;
430 	if      (d < 0x80)  { c= d; trailing= 0; }
431 	else if (d < 0xC0) {
432 	    /* trailing byte in leading position */
433 	    *outlen = out - outstart;
434 	    *inlen = processed - instart;
435 	    return(-2);
436         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
437         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
438         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
439 	else {
440 	    /* no chance for this in IsoLat1 */
441 	    *outlen = out - outstart;
442 	    *inlen = processed - instart;
443 	    return(-2);
444 	}
445 
446 	if (inend - in < trailing) {
447 	    break;
448 	}
449 
450 	for ( ; trailing; trailing--) {
451 	    if (in >= inend)
452 		break;
453 	    if (((d= *in++) & 0xC0) != 0x80) {
454 		*outlen = out - outstart;
455 		*inlen = processed - instart;
456 		return(-2);
457 	    }
458 	    c <<= 6;
459 	    c |= d & 0x3F;
460 	}
461 
462 	/* assertion: c is a single UTF-4 value */
463 	if (c <= 0xFF) {
464 	    if (out >= outend)
465 		break;
466 	    *out++ = c;
467 	} else {
468 	    /* no chance for this in IsoLat1 */
469 	    *outlen = out - outstart;
470 	    *inlen = processed - instart;
471 	    return(-2);
472 	}
473 	processed = in;
474     }
475     *outlen = out - outstart;
476     *inlen = processed - instart;
477     return(*outlen);
478 }
479 #endif /* LIBXML_OUTPUT_ENABLED */
480 
481 /**
482  * UTF16LEToUTF8:
483  * @out:  a pointer to an array of bytes to store the result
484  * @outlen:  the length of @out
485  * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
486  * @inlenb:  the length of @in in UTF-16LE chars
487  *
488  * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
489  * block of chars out. This function assumes the endian property
490  * is the same between the native type of this machine and the
491  * inputed one.
492  *
493  * Returns the number of bytes written, or -1 if lack of space, or -2
494  *     if the transcoding fails (if *in is not a valid utf16 string)
495  *     The value of *inlen after return is the number of octets consumed
496  *     if the return value is positive, else unpredictable.
497  */
498 static int
UTF16LEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)499 UTF16LEToUTF8(unsigned char* out, int *outlen,
500             const unsigned char* inb, int *inlenb)
501 {
502     unsigned char* outstart = out;
503     const unsigned char* processed = inb;
504     unsigned char* outend;
505     unsigned short* in = (unsigned short*) inb;
506     unsigned short* inend;
507     unsigned int c, d, inlen;
508     unsigned char *tmp;
509     int bits;
510 
511     if (*outlen == 0) {
512         *inlenb = 0;
513         return(0);
514     }
515     outend = out + *outlen;
516     if ((*inlenb % 2) == 1)
517         (*inlenb)--;
518     inlen = *inlenb / 2;
519     inend = in + inlen;
520     while ((in < inend) && (out - outstart + 5 < *outlen)) {
521         if (xmlLittleEndian) {
522 	    c= *in++;
523 	} else {
524 	    tmp = (unsigned char *) in;
525 	    c = *tmp++;
526 	    c = c | (((unsigned int)*tmp) << 8);
527 	    in++;
528 	}
529         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
530 	    if (in >= inend) {           /* (in > inend) shouldn't happens */
531 		break;
532 	    }
533 	    if (xmlLittleEndian) {
534 		d = *in++;
535 	    } else {
536 		tmp = (unsigned char *) in;
537 		d = *tmp++;
538 		d = d | (((unsigned int)*tmp) << 8);
539 		in++;
540 	    }
541             if ((d & 0xFC00) == 0xDC00) {
542                 c &= 0x03FF;
543                 c <<= 10;
544                 c |= d & 0x03FF;
545                 c += 0x10000;
546             }
547             else {
548 		*outlen = out - outstart;
549 		*inlenb = processed - inb;
550 	        return(-2);
551 	    }
552         }
553 
554 	/* assertion: c is a single UTF-4 value */
555         if (out >= outend)
556 	    break;
557         if      (c <    0x80) {  *out++=  c;                bits= -6; }
558         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
559         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
560         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
561 
562         for ( ; bits >= 0; bits-= 6) {
563             if (out >= outend)
564 	        break;
565             *out++= ((c >> bits) & 0x3F) | 0x80;
566         }
567 	processed = (const unsigned char*) in;
568     }
569     *outlen = out - outstart;
570     *inlenb = processed - inb;
571     return(*outlen);
572 }
573 
574 #ifdef LIBXML_OUTPUT_ENABLED
575 /**
576  * UTF8ToUTF16LE:
577  * @outb:  a pointer to an array of bytes to store the result
578  * @outlen:  the length of @outb
579  * @in:  a pointer to an array of UTF-8 chars
580  * @inlen:  the length of @in
581  *
582  * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
583  * block of chars out.
584  *
585  * Returns the number of bytes written, or -1 if lack of space, or -2
586  *     if the transcoding failed.
587  */
588 static int
UTF8ToUTF16LE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)589 UTF8ToUTF16LE(unsigned char* outb, int *outlen,
590             const unsigned char* in, int *inlen)
591 {
592     unsigned short* out = (unsigned short*) outb;
593     const unsigned char* processed = in;
594     const unsigned char *const instart = in;
595     unsigned short* outstart= out;
596     unsigned short* outend;
597     const unsigned char* inend;
598     unsigned int c, d;
599     int trailing;
600     unsigned char *tmp;
601     unsigned short tmp1, tmp2;
602 
603     /* UTF16LE encoding has no BOM */
604     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
605     if (in == NULL) {
606 	*outlen = 0;
607 	*inlen = 0;
608 	return(0);
609     }
610     inend= in + *inlen;
611     outend = out + (*outlen / 2);
612     while (in < inend) {
613       d= *in++;
614       if      (d < 0x80)  { c= d; trailing= 0; }
615       else if (d < 0xC0) {
616           /* trailing byte in leading position */
617 	  *outlen = (out - outstart) * 2;
618 	  *inlen = processed - instart;
619 	  return(-2);
620       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
621       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
622       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
623       else {
624 	/* no chance for this in UTF-16 */
625 	*outlen = (out - outstart) * 2;
626 	*inlen = processed - instart;
627 	return(-2);
628       }
629 
630       if (inend - in < trailing) {
631           break;
632       }
633 
634       for ( ; trailing; trailing--) {
635           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
636 	      break;
637           c <<= 6;
638           c |= d & 0x3F;
639       }
640 
641       /* assertion: c is a single UTF-4 value */
642         if (c < 0x10000) {
643             if (out >= outend)
644 	        break;
645 	    if (xmlLittleEndian) {
646 		*out++ = c;
647 	    } else {
648 		tmp = (unsigned char *) out;
649 		*tmp = c ;
650 		*(tmp + 1) = c >> 8 ;
651 		out++;
652 	    }
653         }
654         else if (c < 0x110000) {
655             if (out+1 >= outend)
656 	        break;
657             c -= 0x10000;
658 	    if (xmlLittleEndian) {
659 		*out++ = 0xD800 | (c >> 10);
660 		*out++ = 0xDC00 | (c & 0x03FF);
661 	    } else {
662 		tmp1 = 0xD800 | (c >> 10);
663 		tmp = (unsigned char *) out;
664 		*tmp = (unsigned char) tmp1;
665 		*(tmp + 1) = tmp1 >> 8;
666 		out++;
667 
668 		tmp2 = 0xDC00 | (c & 0x03FF);
669 		tmp = (unsigned char *) out;
670 		*tmp  = (unsigned char) tmp2;
671 		*(tmp + 1) = tmp2 >> 8;
672 		out++;
673 	    }
674         }
675         else
676 	    break;
677 	processed = in;
678     }
679     *outlen = (out - outstart) * 2;
680     *inlen = processed - instart;
681     return(*outlen);
682 }
683 
684 /**
685  * UTF8ToUTF16:
686  * @outb:  a pointer to an array of bytes to store the result
687  * @outlen:  the length of @outb
688  * @in:  a pointer to an array of UTF-8 chars
689  * @inlen:  the length of @in
690  *
691  * Take a block of UTF-8 chars in and try to convert it to an UTF-16
692  * block of chars out.
693  *
694  * Returns the number of bytes written, or -1 if lack of space, or -2
695  *     if the transcoding failed.
696  */
697 static int
UTF8ToUTF16(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)698 UTF8ToUTF16(unsigned char* outb, int *outlen,
699             const unsigned char* in, int *inlen)
700 {
701     if (in == NULL) {
702 	/*
703 	 * initialization, add the Byte Order Mark for UTF-16LE
704 	 */
705         if (*outlen >= 2) {
706 	    outb[0] = 0xFF;
707 	    outb[1] = 0xFE;
708 	    *outlen = 2;
709 	    *inlen = 0;
710 #ifdef DEBUG_ENCODING
711             xmlGenericError(xmlGenericErrorContext,
712 		    "Added FFFE Byte Order Mark\n");
713 #endif
714 	    return(2);
715 	}
716 	*outlen = 0;
717 	*inlen = 0;
718 	return(0);
719     }
720     return (UTF8ToUTF16LE(outb, outlen, in, inlen));
721 }
722 #endif /* LIBXML_OUTPUT_ENABLED */
723 
724 /**
725  * UTF16BEToUTF8:
726  * @out:  a pointer to an array of bytes to store the result
727  * @outlen:  the length of @out
728  * @inb:  a pointer to an array of UTF-16 passed as a byte array
729  * @inlenb:  the length of @in in UTF-16 chars
730  *
731  * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
732  * block of chars out. This function assumes the endian property
733  * is the same between the native type of this machine and the
734  * inputed one.
735  *
736  * Returns the number of bytes written, or -1 if lack of space, or -2
737  *     if the transcoding fails (if *in is not a valid utf16 string)
738  * The value of *inlen after return is the number of octets consumed
739  *     if the return value is positive, else unpredictable.
740  */
741 static int
UTF16BEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)742 UTF16BEToUTF8(unsigned char* out, int *outlen,
743             const unsigned char* inb, int *inlenb)
744 {
745     unsigned char* outstart = out;
746     const unsigned char* processed = inb;
747     unsigned char* outend = out + *outlen;
748     unsigned short* in = (unsigned short*) inb;
749     unsigned short* inend;
750     unsigned int c, d, inlen;
751     unsigned char *tmp;
752     int bits;
753 
754     if ((*inlenb % 2) == 1)
755         (*inlenb)--;
756     inlen = *inlenb / 2;
757     inend= in + inlen;
758     while (in < inend) {
759 	if (xmlLittleEndian) {
760 	    tmp = (unsigned char *) in;
761 	    c = *tmp++;
762 	    c = c << 8;
763 	    c = c | (unsigned int) *tmp;
764 	    in++;
765 	} else {
766 	    c= *in++;
767 	}
768         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
769 	    if (in >= inend) {           /* (in > inend) shouldn't happens */
770 		*outlen = out - outstart;
771 		*inlenb = processed - inb;
772 	        return(-2);
773 	    }
774 	    if (xmlLittleEndian) {
775 		tmp = (unsigned char *) in;
776 		d = *tmp++;
777 		d = d << 8;
778 		d = d | (unsigned int) *tmp;
779 		in++;
780 	    } else {
781 		d= *in++;
782 	    }
783             if ((d & 0xFC00) == 0xDC00) {
784                 c &= 0x03FF;
785                 c <<= 10;
786                 c |= d & 0x03FF;
787                 c += 0x10000;
788             }
789             else {
790 		*outlen = out - outstart;
791 		*inlenb = processed - inb;
792 	        return(-2);
793 	    }
794         }
795 
796 	/* assertion: c is a single UTF-4 value */
797         if (out >= outend)
798 	    break;
799         if      (c <    0x80) {  *out++=  c;                bits= -6; }
800         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
801         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
802         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
803 
804         for ( ; bits >= 0; bits-= 6) {
805             if (out >= outend)
806 	        break;
807             *out++= ((c >> bits) & 0x3F) | 0x80;
808         }
809 	processed = (const unsigned char*) in;
810     }
811     *outlen = out - outstart;
812     *inlenb = processed - inb;
813     return(*outlen);
814 }
815 
816 #ifdef LIBXML_OUTPUT_ENABLED
817 /**
818  * UTF8ToUTF16BE:
819  * @outb:  a pointer to an array of bytes to store the result
820  * @outlen:  the length of @outb
821  * @in:  a pointer to an array of UTF-8 chars
822  * @inlen:  the length of @in
823  *
824  * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
825  * block of chars out.
826  *
827  * Returns the number of byte written, or -1 by lack of space, or -2
828  *     if the transcoding failed.
829  */
830 static int
UTF8ToUTF16BE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)831 UTF8ToUTF16BE(unsigned char* outb, int *outlen,
832             const unsigned char* in, int *inlen)
833 {
834     unsigned short* out = (unsigned short*) outb;
835     const unsigned char* processed = in;
836     const unsigned char *const instart = in;
837     unsigned short* outstart= out;
838     unsigned short* outend;
839     const unsigned char* inend;
840     unsigned int c, d;
841     int trailing;
842     unsigned char *tmp;
843     unsigned short tmp1, tmp2;
844 
845     /* UTF-16BE has no BOM */
846     if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
847     if (in == NULL) {
848 	*outlen = 0;
849 	*inlen = 0;
850 	return(0);
851     }
852     inend= in + *inlen;
853     outend = out + (*outlen / 2);
854     while (in < inend) {
855       d= *in++;
856       if      (d < 0x80)  { c= d; trailing= 0; }
857       else if (d < 0xC0)  {
858           /* trailing byte in leading position */
859 	  *outlen = out - outstart;
860 	  *inlen = processed - instart;
861 	  return(-2);
862       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
863       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
864       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
865       else {
866           /* no chance for this in UTF-16 */
867 	  *outlen = out - outstart;
868 	  *inlen = processed - instart;
869 	  return(-2);
870       }
871 
872       if (inend - in < trailing) {
873           break;
874       }
875 
876       for ( ; trailing; trailing--) {
877           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
878           c <<= 6;
879           c |= d & 0x3F;
880       }
881 
882       /* assertion: c is a single UTF-4 value */
883         if (c < 0x10000) {
884             if (out >= outend)  break;
885 	    if (xmlLittleEndian) {
886 		tmp = (unsigned char *) out;
887 		*tmp = c >> 8;
888 		*(tmp + 1) = c;
889 		out++;
890 	    } else {
891 		*out++ = c;
892 	    }
893         }
894         else if (c < 0x110000) {
895             if (out+1 >= outend)  break;
896             c -= 0x10000;
897 	    if (xmlLittleEndian) {
898 		tmp1 = 0xD800 | (c >> 10);
899 		tmp = (unsigned char *) out;
900 		*tmp = tmp1 >> 8;
901 		*(tmp + 1) = (unsigned char) tmp1;
902 		out++;
903 
904 		tmp2 = 0xDC00 | (c & 0x03FF);
905 		tmp = (unsigned char *) out;
906 		*tmp = tmp2 >> 8;
907 		*(tmp + 1) = (unsigned char) tmp2;
908 		out++;
909 	    } else {
910 		*out++ = 0xD800 | (c >> 10);
911 		*out++ = 0xDC00 | (c & 0x03FF);
912 	    }
913         }
914         else
915 	    break;
916 	processed = in;
917     }
918     *outlen = (out - outstart) * 2;
919     *inlen = processed - instart;
920     return(*outlen);
921 }
922 #endif /* LIBXML_OUTPUT_ENABLED */
923 
924 /************************************************************************
925  *									*
926  *		Generic encoding handling routines			*
927  *									*
928  ************************************************************************/
929 
930 /**
931  * xmlDetectCharEncoding:
932  * @in:  a pointer to the first bytes of the XML entity, must be at least
933  *       2 bytes long (at least 4 if encoding is UTF4 variant).
934  * @len:  pointer to the length of the buffer
935  *
936  * Guess the encoding of the entity using the first bytes of the entity content
937  * according to the non-normative appendix F of the XML-1.0 recommendation.
938  *
939  * Returns one of the XML_CHAR_ENCODING_... values.
940  */
941 xmlCharEncoding
xmlDetectCharEncoding(const unsigned char * in,int len)942 xmlDetectCharEncoding(const unsigned char* in, int len)
943 {
944     if (in == NULL)
945         return(XML_CHAR_ENCODING_NONE);
946     if (len >= 4) {
947 	if ((in[0] == 0x00) && (in[1] == 0x00) &&
948 	    (in[2] == 0x00) && (in[3] == 0x3C))
949 	    return(XML_CHAR_ENCODING_UCS4BE);
950 	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
951 	    (in[2] == 0x00) && (in[3] == 0x00))
952 	    return(XML_CHAR_ENCODING_UCS4LE);
953 	if ((in[0] == 0x00) && (in[1] == 0x00) &&
954 	    (in[2] == 0x3C) && (in[3] == 0x00))
955 	    return(XML_CHAR_ENCODING_UCS4_2143);
956 	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
957 	    (in[2] == 0x00) && (in[3] == 0x00))
958 	    return(XML_CHAR_ENCODING_UCS4_3412);
959 	if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
960 	    (in[2] == 0xA7) && (in[3] == 0x94))
961 	    return(XML_CHAR_ENCODING_EBCDIC);
962 	if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
963 	    (in[2] == 0x78) && (in[3] == 0x6D))
964 	    return(XML_CHAR_ENCODING_UTF8);
965 	/*
966 	 * Although not part of the recommendation, we also
967 	 * attempt an "auto-recognition" of UTF-16LE and
968 	 * UTF-16BE encodings.
969 	 */
970 	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
971 	    (in[2] == 0x3F) && (in[3] == 0x00))
972 	    return(XML_CHAR_ENCODING_UTF16LE);
973 	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
974 	    (in[2] == 0x00) && (in[3] == 0x3F))
975 	    return(XML_CHAR_ENCODING_UTF16BE);
976     }
977     if (len >= 3) {
978 	/*
979 	 * Errata on XML-1.0 June 20 2001
980 	 * We now allow an UTF8 encoded BOM
981 	 */
982 	if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
983 	    (in[2] == 0xBF))
984 	    return(XML_CHAR_ENCODING_UTF8);
985     }
986     /* For UTF-16 we can recognize by the BOM */
987     if (len >= 2) {
988 	if ((in[0] == 0xFE) && (in[1] == 0xFF))
989 	    return(XML_CHAR_ENCODING_UTF16BE);
990 	if ((in[0] == 0xFF) && (in[1] == 0xFE))
991 	    return(XML_CHAR_ENCODING_UTF16LE);
992     }
993     return(XML_CHAR_ENCODING_NONE);
994 }
995 
996 /**
997  * xmlCleanupEncodingAliases:
998  *
999  * Unregisters all aliases
1000  */
1001 void
xmlCleanupEncodingAliases(void)1002 xmlCleanupEncodingAliases(void) {
1003     int i;
1004 
1005     if (xmlCharEncodingAliases == NULL)
1006 	return;
1007 
1008     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1009 	if (xmlCharEncodingAliases[i].name != NULL)
1010 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1011 	if (xmlCharEncodingAliases[i].alias != NULL)
1012 	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
1013     }
1014     xmlCharEncodingAliasesNb = 0;
1015     xmlCharEncodingAliasesMax = 0;
1016     xmlFree(xmlCharEncodingAliases);
1017     xmlCharEncodingAliases = NULL;
1018 }
1019 
1020 /**
1021  * xmlGetEncodingAlias:
1022  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1023  *
1024  * Lookup an encoding name for the given alias.
1025  *
1026  * Returns NULL if not found, otherwise the original name
1027  */
1028 const char *
xmlGetEncodingAlias(const char * alias)1029 xmlGetEncodingAlias(const char *alias) {
1030     int i;
1031     char upper[100];
1032 
1033     if (alias == NULL)
1034 	return(NULL);
1035 
1036     if (xmlCharEncodingAliases == NULL)
1037 	return(NULL);
1038 
1039     for (i = 0;i < 99;i++) {
1040         upper[i] = toupper(alias[i]);
1041 	if (upper[i] == 0) break;
1042     }
1043     upper[i] = 0;
1044 
1045     /*
1046      * Walk down the list looking for a definition of the alias
1047      */
1048     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1049 	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1050 	    return(xmlCharEncodingAliases[i].name);
1051 	}
1052     }
1053     return(NULL);
1054 }
1055 
1056 /**
1057  * xmlAddEncodingAlias:
1058  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1059  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1060  *
1061  * Registers an alias @alias for an encoding named @name. Existing alias
1062  * will be overwritten.
1063  *
1064  * Returns 0 in case of success, -1 in case of error
1065  */
1066 int
xmlAddEncodingAlias(const char * name,const char * alias)1067 xmlAddEncodingAlias(const char *name, const char *alias) {
1068     int i;
1069     char upper[100];
1070 
1071     if ((name == NULL) || (alias == NULL))
1072 	return(-1);
1073 
1074     for (i = 0;i < 99;i++) {
1075         upper[i] = toupper(alias[i]);
1076 	if (upper[i] == 0) break;
1077     }
1078     upper[i] = 0;
1079 
1080     if (xmlCharEncodingAliases == NULL) {
1081 	xmlCharEncodingAliasesNb = 0;
1082 	xmlCharEncodingAliasesMax = 20;
1083 	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1084 	      xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1085 	if (xmlCharEncodingAliases == NULL)
1086 	    return(-1);
1087     } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1088 	xmlCharEncodingAliasesMax *= 2;
1089 	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1090 	      xmlRealloc(xmlCharEncodingAliases,
1091 		         xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1092     }
1093     /*
1094      * Walk down the list looking for a definition of the alias
1095      */
1096     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1097 	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1098 	    /*
1099 	     * Replace the definition.
1100 	     */
1101 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1102 	    xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1103 	    return(0);
1104 	}
1105     }
1106     /*
1107      * Add the definition
1108      */
1109     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1110     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1111     xmlCharEncodingAliasesNb++;
1112     return(0);
1113 }
1114 
1115 /**
1116  * xmlDelEncodingAlias:
1117  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1118  *
1119  * Unregisters an encoding alias @alias
1120  *
1121  * Returns 0 in case of success, -1 in case of error
1122  */
1123 int
xmlDelEncodingAlias(const char * alias)1124 xmlDelEncodingAlias(const char *alias) {
1125     int i;
1126 
1127     if (alias == NULL)
1128 	return(-1);
1129 
1130     if (xmlCharEncodingAliases == NULL)
1131 	return(-1);
1132     /*
1133      * Walk down the list looking for a definition of the alias
1134      */
1135     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1136 	if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1137 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1138 	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
1139 	    xmlCharEncodingAliasesNb--;
1140 	    memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1141 		    sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1142 	    return(0);
1143 	}
1144     }
1145     return(-1);
1146 }
1147 
1148 /**
1149  * xmlParseCharEncoding:
1150  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1151  *
1152  * Compare the string to the encoding schemes already known. Note
1153  * that the comparison is case insensitive accordingly to the section
1154  * [XML] 4.3.3 Character Encoding in Entities.
1155  *
1156  * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1157  * if not recognized.
1158  */
1159 xmlCharEncoding
xmlParseCharEncoding(const char * name)1160 xmlParseCharEncoding(const char* name)
1161 {
1162     const char *alias;
1163     char upper[500];
1164     int i;
1165 
1166     if (name == NULL)
1167 	return(XML_CHAR_ENCODING_NONE);
1168 
1169     /*
1170      * Do the alias resolution
1171      */
1172     alias = xmlGetEncodingAlias(name);
1173     if (alias != NULL)
1174 	name = alias;
1175 
1176     for (i = 0;i < 499;i++) {
1177         upper[i] = toupper(name[i]);
1178 	if (upper[i] == 0) break;
1179     }
1180     upper[i] = 0;
1181 
1182     if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1183     if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1184     if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1185 
1186     /*
1187      * NOTE: if we were able to parse this, the endianness of UTF16 is
1188      *       already found and in use
1189      */
1190     if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1191     if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1192 
1193     if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1194     if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1195     if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1196 
1197     /*
1198      * NOTE: if we were able to parse this, the endianness of UCS4 is
1199      *       already found and in use
1200      */
1201     if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1202     if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1203     if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1204 
1205 
1206     if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1207     if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1208     if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1209 
1210     if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1211     if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1212     if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1213 
1214     if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1215     if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1216     if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1217     if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1218     if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1219     if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1220     if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1221 
1222     if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1223     if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1224     if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1225 
1226 #ifdef DEBUG_ENCODING
1227     xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1228 #endif
1229     return(XML_CHAR_ENCODING_ERROR);
1230 }
1231 
1232 /**
1233  * xmlGetCharEncodingName:
1234  * @enc:  the encoding
1235  *
1236  * The "canonical" name for XML encoding.
1237  * C.f. http://www.w3.org/TR/REC-xml#charencoding
1238  * Section 4.3.3  Character Encoding in Entities
1239  *
1240  * Returns the canonical name for the given encoding
1241  */
1242 
1243 const char*
xmlGetCharEncodingName(xmlCharEncoding enc)1244 xmlGetCharEncodingName(xmlCharEncoding enc) {
1245     switch (enc) {
1246         case XML_CHAR_ENCODING_ERROR:
1247 	    return(NULL);
1248         case XML_CHAR_ENCODING_NONE:
1249 	    return(NULL);
1250         case XML_CHAR_ENCODING_UTF8:
1251 	    return("UTF-8");
1252         case XML_CHAR_ENCODING_UTF16LE:
1253 	    return("UTF-16");
1254         case XML_CHAR_ENCODING_UTF16BE:
1255 	    return("UTF-16");
1256         case XML_CHAR_ENCODING_EBCDIC:
1257             return("EBCDIC");
1258         case XML_CHAR_ENCODING_UCS4LE:
1259             return("ISO-10646-UCS-4");
1260         case XML_CHAR_ENCODING_UCS4BE:
1261             return("ISO-10646-UCS-4");
1262         case XML_CHAR_ENCODING_UCS4_2143:
1263             return("ISO-10646-UCS-4");
1264         case XML_CHAR_ENCODING_UCS4_3412:
1265             return("ISO-10646-UCS-4");
1266         case XML_CHAR_ENCODING_UCS2:
1267             return("ISO-10646-UCS-2");
1268         case XML_CHAR_ENCODING_8859_1:
1269 	    return("ISO-8859-1");
1270         case XML_CHAR_ENCODING_8859_2:
1271 	    return("ISO-8859-2");
1272         case XML_CHAR_ENCODING_8859_3:
1273 	    return("ISO-8859-3");
1274         case XML_CHAR_ENCODING_8859_4:
1275 	    return("ISO-8859-4");
1276         case XML_CHAR_ENCODING_8859_5:
1277 	    return("ISO-8859-5");
1278         case XML_CHAR_ENCODING_8859_6:
1279 	    return("ISO-8859-6");
1280         case XML_CHAR_ENCODING_8859_7:
1281 	    return("ISO-8859-7");
1282         case XML_CHAR_ENCODING_8859_8:
1283 	    return("ISO-8859-8");
1284         case XML_CHAR_ENCODING_8859_9:
1285 	    return("ISO-8859-9");
1286         case XML_CHAR_ENCODING_2022_JP:
1287             return("ISO-2022-JP");
1288         case XML_CHAR_ENCODING_SHIFT_JIS:
1289             return("Shift-JIS");
1290         case XML_CHAR_ENCODING_EUC_JP:
1291             return("EUC-JP");
1292 	case XML_CHAR_ENCODING_ASCII:
1293 	    return(NULL);
1294     }
1295     return(NULL);
1296 }
1297 
1298 /************************************************************************
1299  *									*
1300  *			Char encoding handlers				*
1301  *									*
1302  ************************************************************************/
1303 
1304 
1305 /* the size should be growable, but it's not a big deal ... */
1306 #define MAX_ENCODING_HANDLERS 50
1307 static xmlCharEncodingHandlerPtr *handlers = NULL;
1308 static int nbCharEncodingHandler = 0;
1309 
1310 /*
1311  * The default is UTF-8 for XML, that's also the default used for the
1312  * parser internals, so the default encoding handler is NULL
1313  */
1314 
1315 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1316 
1317 /**
1318  * xmlNewCharEncodingHandler:
1319  * @name:  the encoding name, in UTF-8 format (ASCII actually)
1320  * @input:  the xmlCharEncodingInputFunc to read that encoding
1321  * @output:  the xmlCharEncodingOutputFunc to write that encoding
1322  *
1323  * Create and registers an xmlCharEncodingHandler.
1324  *
1325  * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1326  */
1327 xmlCharEncodingHandlerPtr
xmlNewCharEncodingHandler(const char * name,xmlCharEncodingInputFunc input,xmlCharEncodingOutputFunc output)1328 xmlNewCharEncodingHandler(const char *name,
1329                           xmlCharEncodingInputFunc input,
1330                           xmlCharEncodingOutputFunc output) {
1331     xmlCharEncodingHandlerPtr handler;
1332     const char *alias;
1333     char upper[500];
1334     int i;
1335     char *up = NULL;
1336 
1337     /*
1338      * Do the alias resolution
1339      */
1340     alias = xmlGetEncodingAlias(name);
1341     if (alias != NULL)
1342 	name = alias;
1343 
1344     /*
1345      * Keep only the uppercase version of the encoding.
1346      */
1347     if (name == NULL) {
1348         xmlEncodingErr(XML_I18N_NO_NAME,
1349 		       "xmlNewCharEncodingHandler : no name !\n", NULL);
1350 	return(NULL);
1351     }
1352     for (i = 0;i < 499;i++) {
1353         upper[i] = toupper(name[i]);
1354 	if (upper[i] == 0) break;
1355     }
1356     upper[i] = 0;
1357     up = xmlMemStrdup(upper);
1358     if (up == NULL) {
1359         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1360 	return(NULL);
1361     }
1362 
1363     /*
1364      * allocate and fill-up an handler block.
1365      */
1366     handler = (xmlCharEncodingHandlerPtr)
1367               xmlMalloc(sizeof(xmlCharEncodingHandler));
1368     if (handler == NULL) {
1369         xmlFree(up);
1370         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1371 	return(NULL);
1372     }
1373     memset(handler, 0, sizeof(xmlCharEncodingHandler));
1374     handler->input = input;
1375     handler->output = output;
1376     handler->name = up;
1377 
1378 #ifdef LIBXML_ICONV_ENABLED
1379     handler->iconv_in = NULL;
1380     handler->iconv_out = NULL;
1381 #endif
1382 #ifdef LIBXML_ICU_ENABLED
1383     handler->uconv_in = NULL;
1384     handler->uconv_out = NULL;
1385 #endif
1386 
1387     /*
1388      * registers and returns the handler.
1389      */
1390     xmlRegisterCharEncodingHandler(handler);
1391 #ifdef DEBUG_ENCODING
1392     xmlGenericError(xmlGenericErrorContext,
1393 	    "Registered encoding handler for %s\n", name);
1394 #endif
1395     return(handler);
1396 }
1397 
1398 /**
1399  * xmlInitCharEncodingHandlers:
1400  *
1401  * Initialize the char encoding support, it registers the default
1402  * encoding supported.
1403  * NOTE: while public, this function usually doesn't need to be called
1404  *       in normal processing.
1405  */
1406 void
xmlInitCharEncodingHandlers(void)1407 xmlInitCharEncodingHandlers(void) {
1408     unsigned short int tst = 0x1234;
1409     unsigned char *ptr = (unsigned char *) &tst;
1410 
1411     if (handlers != NULL) return;
1412 
1413     handlers = (xmlCharEncodingHandlerPtr *)
1414         xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1415 
1416     if (*ptr == 0x12) xmlLittleEndian = 0;
1417     else if (*ptr == 0x34) xmlLittleEndian = 1;
1418     else {
1419         xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1420 	               "Odd problem at endianness detection\n", NULL);
1421     }
1422 
1423     if (handlers == NULL) {
1424         xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1425 	return;
1426     }
1427     xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1428 #ifdef LIBXML_OUTPUT_ENABLED
1429     xmlUTF16LEHandler =
1430           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1431     xmlUTF16BEHandler =
1432           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1433     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1434     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1435     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1436     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1437 #ifdef LIBXML_HTML_ENABLED
1438     xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1439 #endif
1440 #else
1441     xmlUTF16LEHandler =
1442           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1443     xmlUTF16BEHandler =
1444           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1445     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1446     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1447     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1448     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1449 #endif /* LIBXML_OUTPUT_ENABLED */
1450 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1451 #ifdef LIBXML_ISO8859X_ENABLED
1452     xmlRegisterCharEncodingHandlersISO8859x ();
1453 #endif
1454 #endif
1455 
1456 }
1457 
1458 /**
1459  * xmlCleanupCharEncodingHandlers:
1460  *
1461  * Cleanup the memory allocated for the char encoding support, it
1462  * unregisters all the encoding handlers and the aliases.
1463  */
1464 void
xmlCleanupCharEncodingHandlers(void)1465 xmlCleanupCharEncodingHandlers(void) {
1466     xmlCleanupEncodingAliases();
1467 
1468     if (handlers == NULL) return;
1469 
1470     for (;nbCharEncodingHandler > 0;) {
1471         nbCharEncodingHandler--;
1472 	if (handlers[nbCharEncodingHandler] != NULL) {
1473 	    if (handlers[nbCharEncodingHandler]->name != NULL)
1474 		xmlFree(handlers[nbCharEncodingHandler]->name);
1475 	    xmlFree(handlers[nbCharEncodingHandler]);
1476 	}
1477     }
1478     xmlFree(handlers);
1479     handlers = NULL;
1480     nbCharEncodingHandler = 0;
1481     xmlDefaultCharEncodingHandler = NULL;
1482 }
1483 
1484 /**
1485  * xmlRegisterCharEncodingHandler:
1486  * @handler:  the xmlCharEncodingHandlerPtr handler block
1487  *
1488  * Register the char encoding handler, surprising, isn't it ?
1489  */
1490 void
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler)1491 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1492     if (handlers == NULL) xmlInitCharEncodingHandlers();
1493     if ((handler == NULL) || (handlers == NULL)) {
1494         xmlEncodingErr(XML_I18N_NO_HANDLER,
1495 		"xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1496         goto free_handler;
1497     }
1498 
1499     if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1500         xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1501 	"xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1502 	               "MAX_ENCODING_HANDLERS");
1503         goto free_handler;
1504     }
1505     handlers[nbCharEncodingHandler++] = handler;
1506     return;
1507 
1508 free_handler:
1509     if (handler != NULL) {
1510         if (handler->name != NULL) {
1511             xmlFree(handler->name);
1512         }
1513         xmlFree(handler);
1514     }
1515 }
1516 
1517 /**
1518  * xmlGetCharEncodingHandler:
1519  * @enc:  an xmlCharEncoding value.
1520  *
1521  * Search in the registered set the handler able to read/write that encoding.
1522  *
1523  * Returns the handler or NULL if not found
1524  */
1525 xmlCharEncodingHandlerPtr
xmlGetCharEncodingHandler(xmlCharEncoding enc)1526 xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1527     xmlCharEncodingHandlerPtr handler;
1528 
1529     if (handlers == NULL) xmlInitCharEncodingHandlers();
1530     switch (enc) {
1531         case XML_CHAR_ENCODING_ERROR:
1532 	    return(NULL);
1533         case XML_CHAR_ENCODING_NONE:
1534 	    return(NULL);
1535         case XML_CHAR_ENCODING_UTF8:
1536 	    return(NULL);
1537         case XML_CHAR_ENCODING_UTF16LE:
1538 	    return(xmlUTF16LEHandler);
1539         case XML_CHAR_ENCODING_UTF16BE:
1540 	    return(xmlUTF16BEHandler);
1541         case XML_CHAR_ENCODING_EBCDIC:
1542             handler = xmlFindCharEncodingHandler("EBCDIC");
1543             if (handler != NULL) return(handler);
1544             handler = xmlFindCharEncodingHandler("ebcdic");
1545             if (handler != NULL) return(handler);
1546             handler = xmlFindCharEncodingHandler("EBCDIC-US");
1547             if (handler != NULL) return(handler);
1548             handler = xmlFindCharEncodingHandler("IBM-037");
1549             if (handler != NULL) return(handler);
1550 	    break;
1551         case XML_CHAR_ENCODING_UCS4BE:
1552             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1553             if (handler != NULL) return(handler);
1554             handler = xmlFindCharEncodingHandler("UCS-4");
1555             if (handler != NULL) return(handler);
1556             handler = xmlFindCharEncodingHandler("UCS4");
1557             if (handler != NULL) return(handler);
1558 	    break;
1559         case XML_CHAR_ENCODING_UCS4LE:
1560             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1561             if (handler != NULL) return(handler);
1562             handler = xmlFindCharEncodingHandler("UCS-4");
1563             if (handler != NULL) return(handler);
1564             handler = xmlFindCharEncodingHandler("UCS4");
1565             if (handler != NULL) return(handler);
1566 	    break;
1567         case XML_CHAR_ENCODING_UCS4_2143:
1568 	    break;
1569         case XML_CHAR_ENCODING_UCS4_3412:
1570 	    break;
1571         case XML_CHAR_ENCODING_UCS2:
1572             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1573             if (handler != NULL) return(handler);
1574             handler = xmlFindCharEncodingHandler("UCS-2");
1575             if (handler != NULL) return(handler);
1576             handler = xmlFindCharEncodingHandler("UCS2");
1577             if (handler != NULL) return(handler);
1578 	    break;
1579 
1580 	    /*
1581 	     * We used to keep ISO Latin encodings native in the
1582 	     * generated data. This led to so many problems that
1583 	     * this has been removed. One can still change this
1584 	     * back by registering no-ops encoders for those
1585 	     */
1586         case XML_CHAR_ENCODING_8859_1:
1587 	    handler = xmlFindCharEncodingHandler("ISO-8859-1");
1588 	    if (handler != NULL) return(handler);
1589 	    break;
1590         case XML_CHAR_ENCODING_8859_2:
1591 	    handler = xmlFindCharEncodingHandler("ISO-8859-2");
1592 	    if (handler != NULL) return(handler);
1593 	    break;
1594         case XML_CHAR_ENCODING_8859_3:
1595 	    handler = xmlFindCharEncodingHandler("ISO-8859-3");
1596 	    if (handler != NULL) return(handler);
1597 	    break;
1598         case XML_CHAR_ENCODING_8859_4:
1599 	    handler = xmlFindCharEncodingHandler("ISO-8859-4");
1600 	    if (handler != NULL) return(handler);
1601 	    break;
1602         case XML_CHAR_ENCODING_8859_5:
1603 	    handler = xmlFindCharEncodingHandler("ISO-8859-5");
1604 	    if (handler != NULL) return(handler);
1605 	    break;
1606         case XML_CHAR_ENCODING_8859_6:
1607 	    handler = xmlFindCharEncodingHandler("ISO-8859-6");
1608 	    if (handler != NULL) return(handler);
1609 	    break;
1610         case XML_CHAR_ENCODING_8859_7:
1611 	    handler = xmlFindCharEncodingHandler("ISO-8859-7");
1612 	    if (handler != NULL) return(handler);
1613 	    break;
1614         case XML_CHAR_ENCODING_8859_8:
1615 	    handler = xmlFindCharEncodingHandler("ISO-8859-8");
1616 	    if (handler != NULL) return(handler);
1617 	    break;
1618         case XML_CHAR_ENCODING_8859_9:
1619 	    handler = xmlFindCharEncodingHandler("ISO-8859-9");
1620 	    if (handler != NULL) return(handler);
1621 	    break;
1622 
1623 
1624         case XML_CHAR_ENCODING_2022_JP:
1625             handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1626             if (handler != NULL) return(handler);
1627 	    break;
1628         case XML_CHAR_ENCODING_SHIFT_JIS:
1629             handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1630             if (handler != NULL) return(handler);
1631             handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1632             if (handler != NULL) return(handler);
1633             handler = xmlFindCharEncodingHandler("Shift_JIS");
1634             if (handler != NULL) return(handler);
1635 	    break;
1636         case XML_CHAR_ENCODING_EUC_JP:
1637             handler = xmlFindCharEncodingHandler("EUC-JP");
1638             if (handler != NULL) return(handler);
1639 	    break;
1640 	default:
1641 	    break;
1642     }
1643 
1644 #ifdef DEBUG_ENCODING
1645     xmlGenericError(xmlGenericErrorContext,
1646 	    "No handler found for encoding %d\n", enc);
1647 #endif
1648     return(NULL);
1649 }
1650 
1651 /**
1652  * xmlFindCharEncodingHandler:
1653  * @name:  a string describing the char encoding.
1654  *
1655  * Search in the registered set the handler able to read/write that encoding.
1656  *
1657  * Returns the handler or NULL if not found
1658  */
1659 xmlCharEncodingHandlerPtr
xmlFindCharEncodingHandler(const char * name)1660 xmlFindCharEncodingHandler(const char *name) {
1661     const char *nalias;
1662     const char *norig;
1663     xmlCharEncoding alias;
1664 #ifdef LIBXML_ICONV_ENABLED
1665     xmlCharEncodingHandlerPtr enc;
1666     iconv_t icv_in, icv_out;
1667 #endif /* LIBXML_ICONV_ENABLED */
1668 #ifdef LIBXML_ICU_ENABLED
1669     xmlCharEncodingHandlerPtr encu;
1670     uconv_t *ucv_in, *ucv_out;
1671 #endif /* LIBXML_ICU_ENABLED */
1672     char upper[100];
1673     int i;
1674 
1675     if (handlers == NULL) xmlInitCharEncodingHandlers();
1676     if (name == NULL) return(xmlDefaultCharEncodingHandler);
1677     if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1678 
1679     /*
1680      * Do the alias resolution
1681      */
1682     norig = name;
1683     nalias = xmlGetEncodingAlias(name);
1684     if (nalias != NULL)
1685 	name = nalias;
1686 
1687     /*
1688      * Check first for directly registered encoding names
1689      */
1690     for (i = 0;i < 99;i++) {
1691         upper[i] = toupper(name[i]);
1692 	if (upper[i] == 0) break;
1693     }
1694     upper[i] = 0;
1695 
1696     if (handlers != NULL) {
1697         for (i = 0;i < nbCharEncodingHandler; i++) {
1698             if (!strcmp(upper, handlers[i]->name)) {
1699 #ifdef DEBUG_ENCODING
1700                 xmlGenericError(xmlGenericErrorContext,
1701                         "Found registered handler for encoding %s\n", name);
1702 #endif
1703                 return(handlers[i]);
1704             }
1705         }
1706     }
1707 
1708 #ifdef LIBXML_ICONV_ENABLED
1709     /* check whether iconv can handle this */
1710     icv_in = iconv_open("UTF-8", name);
1711     icv_out = iconv_open(name, "UTF-8");
1712     if (icv_in == (iconv_t) -1) {
1713         icv_in = iconv_open("UTF-8", upper);
1714     }
1715     if (icv_out == (iconv_t) -1) {
1716 	icv_out = iconv_open(upper, "UTF-8");
1717     }
1718     if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1719 	    enc = (xmlCharEncodingHandlerPtr)
1720 	          xmlMalloc(sizeof(xmlCharEncodingHandler));
1721 	    if (enc == NULL) {
1722 	        iconv_close(icv_in);
1723 	        iconv_close(icv_out);
1724 		return(NULL);
1725 	    }
1726             memset(enc, 0, sizeof(xmlCharEncodingHandler));
1727 	    enc->name = xmlMemStrdup(name);
1728 	    enc->input = NULL;
1729 	    enc->output = NULL;
1730 	    enc->iconv_in = icv_in;
1731 	    enc->iconv_out = icv_out;
1732 #ifdef DEBUG_ENCODING
1733             xmlGenericError(xmlGenericErrorContext,
1734 		    "Found iconv handler for encoding %s\n", name);
1735 #endif
1736 	    return enc;
1737     } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1738 	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1739 		    "iconv : problems with filters for '%s'\n", name);
1740     }
1741 #endif /* LIBXML_ICONV_ENABLED */
1742 #ifdef LIBXML_ICU_ENABLED
1743     /* check whether icu can handle this */
1744     ucv_in = openIcuConverter(name, 1);
1745     ucv_out = openIcuConverter(name, 0);
1746     if (ucv_in != NULL && ucv_out != NULL) {
1747 	    encu = (xmlCharEncodingHandlerPtr)
1748 	           xmlMalloc(sizeof(xmlCharEncodingHandler));
1749 	    if (encu == NULL) {
1750                 closeIcuConverter(ucv_in);
1751                 closeIcuConverter(ucv_out);
1752 		return(NULL);
1753 	    }
1754             memset(encu, 0, sizeof(xmlCharEncodingHandler));
1755 	    encu->name = xmlMemStrdup(name);
1756 	    encu->input = NULL;
1757 	    encu->output = NULL;
1758 	    encu->uconv_in = ucv_in;
1759 	    encu->uconv_out = ucv_out;
1760 #ifdef DEBUG_ENCODING
1761             xmlGenericError(xmlGenericErrorContext,
1762 		    "Found ICU converter handler for encoding %s\n", name);
1763 #endif
1764 	    return encu;
1765     } else if (ucv_in != NULL || ucv_out != NULL) {
1766             closeIcuConverter(ucv_in);
1767             closeIcuConverter(ucv_out);
1768 	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1769 		    "ICU converter : problems with filters for '%s'\n", name);
1770     }
1771 #endif /* LIBXML_ICU_ENABLED */
1772 
1773 #ifdef DEBUG_ENCODING
1774     xmlGenericError(xmlGenericErrorContext,
1775 	    "No handler found for encoding %s\n", name);
1776 #endif
1777 
1778     /*
1779      * Fallback using the canonical names
1780      */
1781     alias = xmlParseCharEncoding(norig);
1782     if (alias != XML_CHAR_ENCODING_ERROR) {
1783         const char* canon;
1784         canon = xmlGetCharEncodingName(alias);
1785         if ((canon != NULL) && (strcmp(name, canon))) {
1786 	    return(xmlFindCharEncodingHandler(canon));
1787         }
1788     }
1789 
1790     /* If "none of the above", give up */
1791     return(NULL);
1792 }
1793 
1794 /************************************************************************
1795  *									*
1796  *		ICONV based generic conversion functions		*
1797  *									*
1798  ************************************************************************/
1799 
1800 #ifdef LIBXML_ICONV_ENABLED
1801 /**
1802  * xmlIconvWrapper:
1803  * @cd:		iconv converter data structure
1804  * @out:  a pointer to an array of bytes to store the result
1805  * @outlen:  the length of @out
1806  * @in:  a pointer to an array of input bytes
1807  * @inlen:  the length of @in
1808  *
1809  * Returns 0 if success, or
1810  *     -1 by lack of space, or
1811  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1812  *        the result of transformation can't fit into the encoding we want), or
1813  *     -3 if there the last byte can't form a single output char.
1814  *
1815  * The value of @inlen after return is the number of octets consumed
1816  *     as the return value is positive, else unpredictable.
1817  * The value of @outlen after return is the number of octets produced.
1818  */
1819 static int
xmlIconvWrapper(iconv_t cd,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)1820 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1821                 const unsigned char *in, int *inlen) {
1822     size_t icv_inlen, icv_outlen;
1823     const char *icv_in = (const char *) in;
1824     char *icv_out = (char *) out;
1825     int ret;
1826 
1827     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1828         if (outlen != NULL) *outlen = 0;
1829         return(-1);
1830     }
1831     icv_inlen = *inlen;
1832     icv_outlen = *outlen;
1833     ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1834     *inlen -= icv_inlen;
1835     *outlen -= icv_outlen;
1836     if ((icv_inlen != 0) || (ret == -1)) {
1837 #ifdef EILSEQ
1838         if (errno == EILSEQ) {
1839             return -2;
1840         } else
1841 #endif
1842 #ifdef E2BIG
1843         if (errno == E2BIG) {
1844             return -1;
1845         } else
1846 #endif
1847 #ifdef EINVAL
1848         if (errno == EINVAL) {
1849             return -3;
1850         } else
1851 #endif
1852         {
1853             return -3;
1854         }
1855     }
1856     return 0;
1857 }
1858 #endif /* LIBXML_ICONV_ENABLED */
1859 
1860 /************************************************************************
1861  *									*
1862  *		ICU based generic conversion functions		*
1863  *									*
1864  ************************************************************************/
1865 
1866 #ifdef LIBXML_ICU_ENABLED
1867 /**
1868  * xmlUconvWrapper:
1869  * @cd: ICU uconverter data structure
1870  * @toUnicode : non-zero if toUnicode. 0 otherwise.
1871  * @out:  a pointer to an array of bytes to store the result
1872  * @outlen:  the length of @out
1873  * @in:  a pointer to an array of input bytes
1874  * @inlen:  the length of @in
1875  * @flush: if true, indicates end of input
1876  *
1877  * Returns 0 if success, or
1878  *     -1 by lack of space, or
1879  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1880  *        the result of transformation can't fit into the encoding we want), or
1881  *     -3 if there the last byte can't form a single output char.
1882  *
1883  * The value of @inlen after return is the number of octets consumed
1884  *     as the return value is positive, else unpredictable.
1885  * The value of @outlen after return is the number of octets produced.
1886  */
1887 static int
xmlUconvWrapper(uconv_t * cd,int toUnicode,unsigned char * out,int * outlen,const unsigned char * in,int * inlen,int flush)1888 xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1889                 const unsigned char *in, int *inlen, int flush) {
1890     const char *ucv_in = (const char *) in;
1891     char *ucv_out = (char *) out;
1892     UErrorCode err = U_ZERO_ERROR;
1893 
1894     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1895         if (outlen != NULL) *outlen = 0;
1896         return(-1);
1897     }
1898 
1899     if (toUnicode) {
1900         /* encoding => UTF-16 => UTF-8 */
1901         ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1902                        &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1903                        &cd->pivot_source, &cd->pivot_target,
1904                        cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1905     } else {
1906         /* UTF-8 => UTF-16 => encoding */
1907         ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1908                        &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1909                        &cd->pivot_source, &cd->pivot_target,
1910                        cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1911     }
1912     *inlen = ucv_in - (const char*) in;
1913     *outlen = ucv_out - (char *) out;
1914     if (U_SUCCESS(err)) {
1915         /* reset pivot buf if this is the last call for input (flush==TRUE) */
1916         if (flush)
1917             cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1918         return 0;
1919     }
1920     if (err == U_BUFFER_OVERFLOW_ERROR)
1921         return -1;
1922     if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1923         return -2;
1924     return -3;
1925 }
1926 #endif /* LIBXML_ICU_ENABLED */
1927 
1928 /************************************************************************
1929  *									*
1930  *		The real API used by libxml for on-the-fly conversion	*
1931  *									*
1932  ************************************************************************/
1933 
1934 /**
1935  * xmlEncInputChunk:
1936  * @handler:  encoding handler
1937  * @out:  a pointer to an array of bytes to store the result
1938  * @outlen:  the length of @out
1939  * @in:  a pointer to an array of input bytes
1940  * @inlen:  the length of @in
1941  * @flush:  flush (ICU-related)
1942  *
1943  * Returns 0 if success, or
1944  *     -1 by lack of space, or
1945  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1946  *        the result of transformation can't fit into the encoding we want), or
1947  *     -3 if there the last byte can't form a single output char.
1948  *
1949  * The value of @inlen after return is the number of octets consumed
1950  *     as the return value is 0, else unpredictable.
1951  * The value of @outlen after return is the number of octets produced.
1952  */
1953 static int
xmlEncInputChunk(xmlCharEncodingHandler * handler,unsigned char * out,int * outlen,const unsigned char * in,int * inlen,int flush)1954 xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1955                  int *outlen, const unsigned char *in, int *inlen, int flush) {
1956     int ret;
1957     (void)flush;
1958 
1959     if (handler->input != NULL) {
1960         ret = handler->input(out, outlen, in, inlen);
1961         if (ret > 0)
1962            ret = 0;
1963     }
1964 #ifdef LIBXML_ICONV_ENABLED
1965     else if (handler->iconv_in != NULL) {
1966         ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
1967     }
1968 #endif /* LIBXML_ICONV_ENABLED */
1969 #ifdef LIBXML_ICU_ENABLED
1970     else if (handler->uconv_in != NULL) {
1971         ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
1972                               flush);
1973     }
1974 #endif /* LIBXML_ICU_ENABLED */
1975     else {
1976         *outlen = 0;
1977         *inlen = 0;
1978         ret = -2;
1979     }
1980 
1981     return(ret);
1982 }
1983 
1984 /**
1985  * xmlEncOutputChunk:
1986  * @handler:  encoding handler
1987  * @out:  a pointer to an array of bytes to store the result
1988  * @outlen:  the length of @out
1989  * @in:  a pointer to an array of input bytes
1990  * @inlen:  the length of @in
1991  *
1992  * Returns 0 if success, or
1993  *     -1 by lack of space, or
1994  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1995  *        the result of transformation can't fit into the encoding we want), or
1996  *     -3 if there the last byte can't form a single output char.
1997  *     -4 if no output function was found.
1998  *
1999  * The value of @inlen after return is the number of octets consumed
2000  *     as the return value is 0, else unpredictable.
2001  * The value of @outlen after return is the number of octets produced.
2002  */
2003 static int
xmlEncOutputChunk(xmlCharEncodingHandler * handler,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)2004 xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2005                   int *outlen, const unsigned char *in, int *inlen) {
2006     int ret;
2007 
2008     if (handler->output != NULL) {
2009         ret = handler->output(out, outlen, in, inlen);
2010         if (ret > 0)
2011            ret = 0;
2012     }
2013 #ifdef LIBXML_ICONV_ENABLED
2014     else if (handler->iconv_out != NULL) {
2015         ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2016     }
2017 #endif /* LIBXML_ICONV_ENABLED */
2018 #ifdef LIBXML_ICU_ENABLED
2019     else if (handler->uconv_out != NULL) {
2020         ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
2021                               1);
2022     }
2023 #endif /* LIBXML_ICU_ENABLED */
2024     else {
2025         *outlen = 0;
2026         *inlen = 0;
2027         ret = -4;
2028     }
2029 
2030     return(ret);
2031 }
2032 
2033 /**
2034  * xmlCharEncFirstLineInt:
2035  * @handler:	char encoding transformation data structure
2036  * @out:  an xmlBuffer for the output.
2037  * @in:  an xmlBuffer for the input
2038  * @len:  number of bytes to convert for the first line, or -1
2039  *
2040  * Front-end for the encoding handler input function, but handle only
2041  * the very first line, i.e. limit itself to 45 chars.
2042  *
2043  * Returns the number of byte written if success, or
2044  *     -1 general error
2045  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2046  *        the result of transformation can't fit into the encoding we want), or
2047  */
2048 int
xmlCharEncFirstLineInt(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in,int len)2049 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2050                        xmlBufferPtr in, int len) {
2051     int ret;
2052     int written;
2053     int toconv;
2054 
2055     if (handler == NULL) return(-1);
2056     if (out == NULL) return(-1);
2057     if (in == NULL) return(-1);
2058 
2059     /* calculate space available */
2060     written = out->size - out->use - 1; /* count '\0' */
2061     toconv = in->use;
2062     /*
2063      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2064      * 45 chars should be sufficient to reach the end of the encoding
2065      * declaration without going too far inside the document content.
2066      * on UTF-16 this means 90bytes, on UCS4 this means 180
2067      * The actual value depending on guessed encoding is passed as @len
2068      * if provided
2069      */
2070     if (len >= 0) {
2071         if (toconv > len)
2072             toconv = len;
2073     } else {
2074         if (toconv > 180)
2075             toconv = 180;
2076     }
2077     if (toconv * 2 >= written) {
2078         xmlBufferGrow(out, toconv * 2);
2079 	written = out->size - out->use - 1;
2080     }
2081 
2082     ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2083                            in->content, &toconv, 0);
2084     xmlBufferShrink(in, toconv);
2085     out->use += written;
2086     out->content[out->use] = 0;
2087     if (ret == -1) ret = -3;
2088 
2089 #ifdef DEBUG_ENCODING
2090     switch (ret) {
2091         case 0:
2092 	    xmlGenericError(xmlGenericErrorContext,
2093 		    "converted %d bytes to %d bytes of input\n",
2094 	            toconv, written);
2095 	    break;
2096         case -1:
2097 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2098 	            toconv, written, in->use);
2099 	    break;
2100         case -2:
2101 	    xmlGenericError(xmlGenericErrorContext,
2102 		    "input conversion failed due to input error\n");
2103 	    break;
2104         case -3:
2105 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2106 	            toconv, written, in->use);
2107 	    break;
2108 	default:
2109 	    xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2110     }
2111 #endif /* DEBUG_ENCODING */
2112     /*
2113      * Ignore when input buffer is not on a boundary
2114      */
2115     if (ret == -3) ret = 0;
2116     if (ret == -1) ret = 0;
2117     return(written ? written : ret);
2118 }
2119 
2120 /**
2121  * xmlCharEncFirstLine:
2122  * @handler:	char encoding transformation data structure
2123  * @out:  an xmlBuffer for the output.
2124  * @in:  an xmlBuffer for the input
2125  *
2126  * Front-end for the encoding handler input function, but handle only
2127  * the very first line, i.e. limit itself to 45 chars.
2128  *
2129  * Returns the number of byte written if success, or
2130  *     -1 general error
2131  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2132  *        the result of transformation can't fit into the encoding we want), or
2133  */
2134 int
xmlCharEncFirstLine(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2135 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2136                  xmlBufferPtr in) {
2137     return(xmlCharEncFirstLineInt(handler, out, in, -1));
2138 }
2139 
2140 /**
2141  * xmlCharEncFirstLineInput:
2142  * @input: a parser input buffer
2143  * @len:  number of bytes to convert for the first line, or -1
2144  *
2145  * Front-end for the encoding handler input function, but handle only
2146  * the very first line. Point is that this is based on autodetection
2147  * of the encoding and once that first line is converted we may find
2148  * out that a different decoder is needed to process the input.
2149  *
2150  * Returns the number of byte written if success, or
2151  *     -1 general error
2152  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2153  *        the result of transformation can't fit into the encoding we want), or
2154  */
2155 int
xmlCharEncFirstLineInput(xmlParserInputBufferPtr input,int len)2156 xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2157 {
2158     int ret;
2159     size_t written;
2160     size_t toconv;
2161     int c_in;
2162     int c_out;
2163     xmlBufPtr in;
2164     xmlBufPtr out;
2165 
2166     if ((input == NULL) || (input->encoder == NULL) ||
2167         (input->buffer == NULL) || (input->raw == NULL))
2168         return (-1);
2169     out = input->buffer;
2170     in = input->raw;
2171 
2172     toconv = xmlBufUse(in);
2173     if (toconv == 0)
2174         return (0);
2175     written = xmlBufAvail(out) - 1; /* count '\0' */
2176     /*
2177      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2178      * 45 chars should be sufficient to reach the end of the encoding
2179      * declaration without going too far inside the document content.
2180      * on UTF-16 this means 90bytes, on UCS4 this means 180
2181      * The actual value depending on guessed encoding is passed as @len
2182      * if provided
2183      */
2184     if (len >= 0) {
2185         if (toconv > (unsigned int) len)
2186             toconv = len;
2187     } else {
2188         if (toconv > 180)
2189             toconv = 180;
2190     }
2191     if (toconv * 2 >= written) {
2192         xmlBufGrow(out, toconv * 2);
2193         written = xmlBufAvail(out) - 1;
2194     }
2195     if (written > 360)
2196         written = 360;
2197 
2198     c_in = toconv;
2199     c_out = written;
2200     ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2201                            xmlBufContent(in), &c_in, 0);
2202     xmlBufShrink(in, c_in);
2203     xmlBufAddLen(out, c_out);
2204     if (ret == -1)
2205         ret = -3;
2206 
2207     switch (ret) {
2208         case 0:
2209 #ifdef DEBUG_ENCODING
2210             xmlGenericError(xmlGenericErrorContext,
2211                             "converted %d bytes to %d bytes of input\n",
2212                             c_in, c_out);
2213 #endif
2214             break;
2215         case -1:
2216 #ifdef DEBUG_ENCODING
2217             xmlGenericError(xmlGenericErrorContext,
2218                          "converted %d bytes to %d bytes of input, %d left\n",
2219                             c_in, c_out, (int)xmlBufUse(in));
2220 #endif
2221             break;
2222         case -3:
2223 #ifdef DEBUG_ENCODING
2224             xmlGenericError(xmlGenericErrorContext,
2225                         "converted %d bytes to %d bytes of input, %d left\n",
2226                             c_in, c_out, (int)xmlBufUse(in));
2227 #endif
2228             break;
2229         case -2: {
2230             char buf[50];
2231             const xmlChar *content = xmlBufContent(in);
2232 
2233 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2234 		     content[0], content[1],
2235 		     content[2], content[3]);
2236 	    buf[49] = 0;
2237 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2238 		    "input conversion failed due to input error, bytes %s\n",
2239 		           buf);
2240         }
2241     }
2242     /*
2243      * Ignore when input buffer is not on a boundary
2244      */
2245     if (ret == -3) ret = 0;
2246     if (ret == -1) ret = 0;
2247     return(c_out ? c_out : ret);
2248 }
2249 
2250 /**
2251  * xmlCharEncInput:
2252  * @input: a parser input buffer
2253  * @flush: try to flush all the raw buffer
2254  *
2255  * Generic front-end for the encoding handler on parser input
2256  *
2257  * Returns the number of byte written if success, or
2258  *     -1 general error
2259  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2260  *        the result of transformation can't fit into the encoding we want), or
2261  */
2262 int
xmlCharEncInput(xmlParserInputBufferPtr input,int flush)2263 xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2264 {
2265     int ret;
2266     size_t written;
2267     size_t toconv;
2268     int c_in;
2269     int c_out;
2270     xmlBufPtr in;
2271     xmlBufPtr out;
2272 
2273     if ((input == NULL) || (input->encoder == NULL) ||
2274         (input->buffer == NULL) || (input->raw == NULL))
2275         return (-1);
2276     out = input->buffer;
2277     in = input->raw;
2278 
2279     toconv = xmlBufUse(in);
2280     if (toconv == 0)
2281         return (0);
2282     if ((toconv > 64 * 1024) && (flush == 0))
2283         toconv = 64 * 1024;
2284     written = xmlBufAvail(out);
2285     if (written > 0)
2286         written--; /* count '\0' */
2287     if (toconv * 2 >= written) {
2288         xmlBufGrow(out, toconv * 2);
2289         written = xmlBufAvail(out);
2290         if (written > 0)
2291             written--; /* count '\0' */
2292     }
2293     if ((written > 128 * 1024) && (flush == 0))
2294         written = 128 * 1024;
2295 
2296     c_in = toconv;
2297     c_out = written;
2298     ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2299                            xmlBufContent(in), &c_in, flush);
2300     xmlBufShrink(in, c_in);
2301     xmlBufAddLen(out, c_out);
2302     if (ret == -1)
2303         ret = -3;
2304 
2305     switch (ret) {
2306         case 0:
2307 #ifdef DEBUG_ENCODING
2308             xmlGenericError(xmlGenericErrorContext,
2309                             "converted %d bytes to %d bytes of input\n",
2310                             c_in, c_out);
2311 #endif
2312             break;
2313         case -1:
2314 #ifdef DEBUG_ENCODING
2315             xmlGenericError(xmlGenericErrorContext,
2316                          "converted %d bytes to %d bytes of input, %d left\n",
2317                             c_in, c_out, (int)xmlBufUse(in));
2318 #endif
2319             break;
2320         case -3:
2321 #ifdef DEBUG_ENCODING
2322             xmlGenericError(xmlGenericErrorContext,
2323                         "converted %d bytes to %d bytes of input, %d left\n",
2324                             c_in, c_out, (int)xmlBufUse(in));
2325 #endif
2326             break;
2327         case -2: {
2328             char buf[50];
2329             const xmlChar *content = xmlBufContent(in);
2330 
2331 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2332 		     content[0], content[1],
2333 		     content[2], content[3]);
2334 	    buf[49] = 0;
2335 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2336 		    "input conversion failed due to input error, bytes %s\n",
2337 		           buf);
2338         }
2339     }
2340     /*
2341      * Ignore when input buffer is not on a boundary
2342      */
2343     if (ret == -3)
2344         ret = 0;
2345     return (c_out? c_out : ret);
2346 }
2347 
2348 /**
2349  * xmlCharEncInFunc:
2350  * @handler:	char encoding transformation data structure
2351  * @out:  an xmlBuffer for the output.
2352  * @in:  an xmlBuffer for the input
2353  *
2354  * Generic front-end for the encoding handler input function
2355  *
2356  * Returns the number of byte written if success, or
2357  *     -1 general error
2358  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2359  *        the result of transformation can't fit into the encoding we want), or
2360  */
2361 int
xmlCharEncInFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2362 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2363                  xmlBufferPtr in)
2364 {
2365     int ret;
2366     int written;
2367     int toconv;
2368 
2369     if (handler == NULL)
2370         return (-1);
2371     if (out == NULL)
2372         return (-1);
2373     if (in == NULL)
2374         return (-1);
2375 
2376     toconv = in->use;
2377     if (toconv == 0)
2378         return (0);
2379     written = out->size - out->use -1; /* count '\0' */
2380     if (toconv * 2 >= written) {
2381         xmlBufferGrow(out, out->size + toconv * 2);
2382         written = out->size - out->use - 1;
2383     }
2384     ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2385                            in->content, &toconv, 1);
2386     xmlBufferShrink(in, toconv);
2387     out->use += written;
2388     out->content[out->use] = 0;
2389     if (ret == -1)
2390         ret = -3;
2391 
2392     switch (ret) {
2393         case 0:
2394 #ifdef DEBUG_ENCODING
2395             xmlGenericError(xmlGenericErrorContext,
2396                             "converted %d bytes to %d bytes of input\n",
2397                             toconv, written);
2398 #endif
2399             break;
2400         case -1:
2401 #ifdef DEBUG_ENCODING
2402             xmlGenericError(xmlGenericErrorContext,
2403                          "converted %d bytes to %d bytes of input, %d left\n",
2404                             toconv, written, in->use);
2405 #endif
2406             break;
2407         case -3:
2408 #ifdef DEBUG_ENCODING
2409             xmlGenericError(xmlGenericErrorContext,
2410                         "converted %d bytes to %d bytes of input, %d left\n",
2411                             toconv, written, in->use);
2412 #endif
2413             break;
2414         case -2: {
2415             char buf[50];
2416 
2417 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2418 		     in->content[0], in->content[1],
2419 		     in->content[2], in->content[3]);
2420 	    buf[49] = 0;
2421 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2422 		    "input conversion failed due to input error, bytes %s\n",
2423 		           buf);
2424         }
2425     }
2426     /*
2427      * Ignore when input buffer is not on a boundary
2428      */
2429     if (ret == -3)
2430         ret = 0;
2431     return (written? written : ret);
2432 }
2433 
2434 #ifdef LIBXML_OUTPUT_ENABLED
2435 /**
2436  * xmlCharEncOutput:
2437  * @output: a parser output buffer
2438  * @init: is this an initialization call without data
2439  *
2440  * Generic front-end for the encoding handler on parser output
2441  * a first call with @init == 1 has to be made first to initiate the
2442  * output in case of non-stateless encoding needing to initiate their
2443  * state or the output (like the BOM in UTF16).
2444  * In case of UTF8 sequence conversion errors for the given encoder,
2445  * the content will be automatically remapped to a CharRef sequence.
2446  *
2447  * Returns the number of byte written if success, or
2448  *     -1 general error
2449  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2450  *        the result of transformation can't fit into the encoding we want), or
2451  */
2452 int
xmlCharEncOutput(xmlOutputBufferPtr output,int init)2453 xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2454 {
2455     int ret;
2456     size_t written;
2457     int writtentot = 0;
2458     size_t toconv;
2459     int c_in;
2460     int c_out;
2461     xmlBufPtr in;
2462     xmlBufPtr out;
2463 
2464     if ((output == NULL) || (output->encoder == NULL) ||
2465         (output->buffer == NULL) || (output->conv == NULL))
2466         return (-1);
2467     out = output->conv;
2468     in = output->buffer;
2469 
2470 retry:
2471 
2472     written = xmlBufAvail(out);
2473     if (written > 0)
2474         written--; /* count '\0' */
2475 
2476     /*
2477      * First specific handling of the initialization call
2478      */
2479     if (init) {
2480         c_in = 0;
2481         c_out = written;
2482         /* TODO: Check return value. */
2483         xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2484                           NULL, &c_in);
2485         xmlBufAddLen(out, c_out);
2486 #ifdef DEBUG_ENCODING
2487 	xmlGenericError(xmlGenericErrorContext,
2488 		"initialized encoder\n");
2489 #endif
2490         return(c_out);
2491     }
2492 
2493     /*
2494      * Conversion itself.
2495      */
2496     toconv = xmlBufUse(in);
2497     if (toconv == 0)
2498         return (0);
2499     if (toconv > 64 * 1024)
2500         toconv = 64 * 1024;
2501     if (toconv * 4 >= written) {
2502         xmlBufGrow(out, toconv * 4);
2503         written = xmlBufAvail(out) - 1;
2504     }
2505     if (written > 256 * 1024)
2506         written = 256 * 1024;
2507 
2508     c_in = toconv;
2509     c_out = written;
2510     ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2511                             xmlBufContent(in), &c_in);
2512     xmlBufShrink(in, c_in);
2513     xmlBufAddLen(out, c_out);
2514     writtentot += c_out;
2515     if (ret == -1) {
2516         if (c_out > 0) {
2517             /* Can be a limitation of iconv or uconv */
2518             goto retry;
2519         }
2520         ret = -3;
2521     }
2522 
2523     /*
2524      * Attempt to handle error cases
2525      */
2526     switch (ret) {
2527         case 0:
2528 #ifdef DEBUG_ENCODING
2529 	    xmlGenericError(xmlGenericErrorContext,
2530 		    "converted %d bytes to %d bytes of output\n",
2531 	            c_in, c_out);
2532 #endif
2533 	    break;
2534         case -1:
2535 #ifdef DEBUG_ENCODING
2536 	    xmlGenericError(xmlGenericErrorContext,
2537 		    "output conversion failed by lack of space\n");
2538 #endif
2539 	    break;
2540         case -3:
2541 #ifdef DEBUG_ENCODING
2542 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2543 	            c_in, c_out, (int) xmlBufUse(in));
2544 #endif
2545 	    break;
2546         case -4:
2547             xmlEncodingErr(XML_I18N_NO_OUTPUT,
2548                            "xmlCharEncOutFunc: no output function !\n", NULL);
2549             ret = -1;
2550             break;
2551         case -2: {
2552 	    xmlChar charref[20];
2553 	    int len = (int) xmlBufUse(in);
2554             xmlChar *content = xmlBufContent(in);
2555 	    int cur, charrefLen;
2556 
2557 	    cur = xmlGetUTF8Char(content, &len);
2558 	    if (cur <= 0)
2559                 break;
2560 
2561 #ifdef DEBUG_ENCODING
2562             xmlGenericError(xmlGenericErrorContext,
2563                     "handling output conversion error\n");
2564             xmlGenericError(xmlGenericErrorContext,
2565                     "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2566                     content[0], content[1],
2567                     content[2], content[3]);
2568 #endif
2569             /*
2570              * Removes the UTF8 sequence, and replace it by a charref
2571              * and continue the transcoding phase, hoping the error
2572              * did not mangle the encoder state.
2573              */
2574             charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2575                              "&#%d;", cur);
2576             xmlBufShrink(in, len);
2577             xmlBufGrow(out, charrefLen * 4);
2578             c_out = xmlBufAvail(out) - 1;
2579             c_in = charrefLen;
2580             ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2581                                     charref, &c_in);
2582 
2583 	    if ((ret < 0) || (c_in != charrefLen)) {
2584 		char buf[50];
2585 
2586 		snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2587 			 content[0], content[1],
2588 			 content[2], content[3]);
2589 		buf[49] = 0;
2590 		xmlEncodingErr(XML_I18N_CONV_FAILED,
2591 		    "output conversion failed due to conv error, bytes %s\n",
2592 			       buf);
2593 		if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2594 		    content[0] = ' ';
2595                 break;
2596 	    }
2597 
2598             xmlBufAddLen(out, c_out);
2599             writtentot += c_out;
2600             goto retry;
2601 	}
2602     }
2603     return(writtentot ? writtentot : ret);
2604 }
2605 #endif
2606 
2607 /**
2608  * xmlCharEncOutFunc:
2609  * @handler:	char encoding transformation data structure
2610  * @out:  an xmlBuffer for the output.
2611  * @in:  an xmlBuffer for the input
2612  *
2613  * Generic front-end for the encoding handler output function
2614  * a first call with @in == NULL has to be made firs to initiate the
2615  * output in case of non-stateless encoding needing to initiate their
2616  * state or the output (like the BOM in UTF16).
2617  * In case of UTF8 sequence conversion errors for the given encoder,
2618  * the content will be automatically remapped to a CharRef sequence.
2619  *
2620  * Returns the number of byte written if success, or
2621  *     -1 general error
2622  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2623  *        the result of transformation can't fit into the encoding we want), or
2624  */
2625 int
xmlCharEncOutFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2626 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2627                   xmlBufferPtr in) {
2628     int ret;
2629     int written;
2630     int writtentot = 0;
2631     int toconv;
2632     int output = 0;
2633 
2634     if (handler == NULL) return(-1);
2635     if (out == NULL) return(-1);
2636 
2637 retry:
2638 
2639     written = out->size - out->use;
2640 
2641     if (written > 0)
2642 	written--; /* Gennady: count '/0' */
2643 
2644     /*
2645      * First specific handling of in = NULL, i.e. the initialization call
2646      */
2647     if (in == NULL) {
2648         toconv = 0;
2649         /* TODO: Check return value. */
2650         xmlEncOutputChunk(handler, &out->content[out->use], &written,
2651                           NULL, &toconv);
2652         out->use += written;
2653         out->content[out->use] = 0;
2654 #ifdef DEBUG_ENCODING
2655 	xmlGenericError(xmlGenericErrorContext,
2656 		"initialized encoder\n");
2657 #endif
2658         return(0);
2659     }
2660 
2661     /*
2662      * Conversion itself.
2663      */
2664     toconv = in->use;
2665     if (toconv == 0)
2666 	return(0);
2667     if (toconv * 4 >= written) {
2668         xmlBufferGrow(out, toconv * 4);
2669 	written = out->size - out->use - 1;
2670     }
2671     ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2672                             in->content, &toconv);
2673     xmlBufferShrink(in, toconv);
2674     out->use += written;
2675     writtentot += written;
2676     out->content[out->use] = 0;
2677     if (ret == -1) {
2678         if (written > 0) {
2679             /* Can be a limitation of iconv or uconv */
2680             goto retry;
2681         }
2682         ret = -3;
2683     }
2684 
2685     if (ret >= 0) output += ret;
2686 
2687     /*
2688      * Attempt to handle error cases
2689      */
2690     switch (ret) {
2691         case 0:
2692 #ifdef DEBUG_ENCODING
2693 	    xmlGenericError(xmlGenericErrorContext,
2694 		    "converted %d bytes to %d bytes of output\n",
2695 	            toconv, written);
2696 #endif
2697 	    break;
2698         case -1:
2699 #ifdef DEBUG_ENCODING
2700 	    xmlGenericError(xmlGenericErrorContext,
2701 		    "output conversion failed by lack of space\n");
2702 #endif
2703 	    break;
2704         case -3:
2705 #ifdef DEBUG_ENCODING
2706 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2707 	            toconv, written, in->use);
2708 #endif
2709 	    break;
2710         case -4:
2711 	    xmlEncodingErr(XML_I18N_NO_OUTPUT,
2712 		           "xmlCharEncOutFunc: no output function !\n", NULL);
2713 	    ret = -1;
2714             break;
2715         case -2: {
2716 	    xmlChar charref[20];
2717 	    int len = in->use;
2718 	    const xmlChar *utf = (const xmlChar *) in->content;
2719 	    int cur, charrefLen;
2720 
2721 	    cur = xmlGetUTF8Char(utf, &len);
2722 	    if (cur <= 0)
2723                 break;
2724 
2725 #ifdef DEBUG_ENCODING
2726             xmlGenericError(xmlGenericErrorContext,
2727                     "handling output conversion error\n");
2728             xmlGenericError(xmlGenericErrorContext,
2729                     "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2730                     in->content[0], in->content[1],
2731                     in->content[2], in->content[3]);
2732 #endif
2733             /*
2734              * Removes the UTF8 sequence, and replace it by a charref
2735              * and continue the transcoding phase, hoping the error
2736              * did not mangle the encoder state.
2737              */
2738             charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2739                              "&#%d;", cur);
2740             xmlBufferShrink(in, len);
2741             xmlBufferGrow(out, charrefLen * 4);
2742 	    written = out->size - out->use - 1;
2743             toconv = charrefLen;
2744             ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2745                                     charref, &toconv);
2746 
2747 	    if ((ret < 0) || (toconv != charrefLen)) {
2748 		char buf[50];
2749 
2750 		snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2751 			 in->content[0], in->content[1],
2752 			 in->content[2], in->content[3]);
2753 		buf[49] = 0;
2754 		xmlEncodingErr(XML_I18N_CONV_FAILED,
2755 		    "output conversion failed due to conv error, bytes %s\n",
2756 			       buf);
2757 		if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2758 		    in->content[0] = ' ';
2759 	        break;
2760 	    }
2761 
2762             out->use += written;
2763             writtentot += written;
2764             out->content[out->use] = 0;
2765             goto retry;
2766 	}
2767     }
2768     return(writtentot ? writtentot : ret);
2769 }
2770 
2771 /**
2772  * xmlCharEncCloseFunc:
2773  * @handler:	char encoding transformation data structure
2774  *
2775  * Generic front-end for encoding handler close function
2776  *
2777  * Returns 0 if success, or -1 in case of error
2778  */
2779 int
xmlCharEncCloseFunc(xmlCharEncodingHandler * handler)2780 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2781     int ret = 0;
2782     int tofree = 0;
2783     int i, handler_in_list = 0;
2784 
2785     if (handler == NULL) return(-1);
2786     if (handler->name == NULL) return(-1);
2787     if (handlers != NULL) {
2788         for (i = 0;i < nbCharEncodingHandler; i++) {
2789             if (handler == handlers[i]) {
2790 	        handler_in_list = 1;
2791 		break;
2792 	    }
2793 	}
2794     }
2795 #ifdef LIBXML_ICONV_ENABLED
2796     /*
2797      * Iconv handlers can be used only once, free the whole block.
2798      * and the associated icon resources.
2799      */
2800     if ((handler_in_list == 0) &&
2801         ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
2802         tofree = 1;
2803 	if (handler->iconv_out != NULL) {
2804 	    if (iconv_close(handler->iconv_out))
2805 		ret = -1;
2806 	    handler->iconv_out = NULL;
2807 	}
2808 	if (handler->iconv_in != NULL) {
2809 	    if (iconv_close(handler->iconv_in))
2810 		ret = -1;
2811 	    handler->iconv_in = NULL;
2812 	}
2813     }
2814 #endif /* LIBXML_ICONV_ENABLED */
2815 #ifdef LIBXML_ICU_ENABLED
2816     if ((handler_in_list == 0) &&
2817         ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
2818         tofree = 1;
2819 	if (handler->uconv_out != NULL) {
2820 	    closeIcuConverter(handler->uconv_out);
2821 	    handler->uconv_out = NULL;
2822 	}
2823 	if (handler->uconv_in != NULL) {
2824 	    closeIcuConverter(handler->uconv_in);
2825 	    handler->uconv_in = NULL;
2826 	}
2827     }
2828 #endif
2829     if (tofree) {
2830         /* free up only dynamic handlers iconv/uconv */
2831         if (handler->name != NULL)
2832             xmlFree(handler->name);
2833         handler->name = NULL;
2834         xmlFree(handler);
2835     }
2836 #ifdef DEBUG_ENCODING
2837     if (ret)
2838         xmlGenericError(xmlGenericErrorContext,
2839 		"failed to close the encoding handler\n");
2840     else
2841         xmlGenericError(xmlGenericErrorContext,
2842 		"closed the encoding handler\n");
2843 #endif
2844 
2845     return(ret);
2846 }
2847 
2848 /**
2849  * xmlByteConsumed:
2850  * @ctxt: an XML parser context
2851  *
2852  * This function provides the current index of the parser relative
2853  * to the start of the current entity. This function is computed in
2854  * bytes from the beginning starting at zero and finishing at the
2855  * size in byte of the file if parsing a file. The function is
2856  * of constant cost if the input is UTF-8 but can be costly if run
2857  * on non-UTF-8 input.
2858  *
2859  * Returns the index in bytes from the beginning of the entity or -1
2860  *         in case the index could not be computed.
2861  */
2862 long
xmlByteConsumed(xmlParserCtxtPtr ctxt)2863 xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2864     xmlParserInputPtr in;
2865 
2866     if (ctxt == NULL) return(-1);
2867     in = ctxt->input;
2868     if (in == NULL)  return(-1);
2869     if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2870         unsigned int unused = 0;
2871 	xmlCharEncodingHandler * handler = in->buf->encoder;
2872         /*
2873 	 * Encoding conversion, compute the number of unused original
2874 	 * bytes from the input not consumed and subtract that from
2875 	 * the raw consumed value, this is not a cheap operation
2876 	 */
2877         if (in->end - in->cur > 0) {
2878 	    unsigned char convbuf[32000];
2879 	    const unsigned char *cur = (const unsigned char *)in->cur;
2880 	    int toconv = in->end - in->cur, written = 32000;
2881 
2882 	    int ret;
2883 
2884             do {
2885                 toconv = in->end - cur;
2886                 written = 32000;
2887                 ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2888                                         cur, &toconv);
2889                 if (ret < 0) {
2890                     if (written > 0)
2891                         ret = -2;
2892                     else
2893                         return(-1);
2894                 }
2895                 unused += written;
2896                 cur += toconv;
2897             } while (ret == -2);
2898 	}
2899 	if (in->buf->rawconsumed < unused)
2900 	    return(-1);
2901 	return(in->buf->rawconsumed - unused);
2902     }
2903     return(in->consumed + (in->cur - in->base));
2904 }
2905 
2906 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2907 #ifdef LIBXML_ISO8859X_ENABLED
2908 
2909 /**
2910  * UTF8ToISO8859x:
2911  * @out:  a pointer to an array of bytes to store the result
2912  * @outlen:  the length of @out
2913  * @in:  a pointer to an array of UTF-8 chars
2914  * @inlen:  the length of @in
2915  * @xlattable: the 2-level transcoding table
2916  *
2917  * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2918  * block of chars out.
2919  *
2920  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2921  * The value of @inlen after return is the number of octets consumed
2922  *     as the return value is positive, else unpredictable.
2923  * The value of @outlen after return is the number of octets consumed.
2924  */
2925 static int
UTF8ToISO8859x(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,unsigned char const * xlattable)2926 UTF8ToISO8859x(unsigned char* out, int *outlen,
2927               const unsigned char* in, int *inlen,
2928               unsigned char const *xlattable) {
2929     const unsigned char* outstart = out;
2930     const unsigned char* inend;
2931     const unsigned char* instart = in;
2932     const unsigned char* processed = in;
2933 
2934     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2935         (xlattable == NULL))
2936 	return(-1);
2937     if (in == NULL) {
2938         /*
2939         * initialization nothing to do
2940         */
2941         *outlen = 0;
2942         *inlen = 0;
2943         return(0);
2944     }
2945     inend = in + (*inlen);
2946     while (in < inend) {
2947         unsigned char d = *in++;
2948         if  (d < 0x80)  {
2949             *out++ = d;
2950         } else if (d < 0xC0) {
2951             /* trailing byte in leading position */
2952             *outlen = out - outstart;
2953             *inlen = processed - instart;
2954             return(-2);
2955         } else if (d < 0xE0) {
2956             unsigned char c;
2957             if (!(in < inend)) {
2958                 /* trailing byte not in input buffer */
2959                 *outlen = out - outstart;
2960                 *inlen = processed - instart;
2961                 return(-3);
2962             }
2963             c = *in++;
2964             if ((c & 0xC0) != 0x80) {
2965                 /* not a trailing byte */
2966                 *outlen = out - outstart;
2967                 *inlen = processed - instart;
2968                 return(-2);
2969             }
2970             c = c & 0x3F;
2971             d = d & 0x1F;
2972             d = xlattable [48 + c + xlattable [d] * 64];
2973             if (d == 0) {
2974                 /* not in character set */
2975                 *outlen = out - outstart;
2976                 *inlen = processed - instart;
2977                 return(-2);
2978             }
2979             *out++ = d;
2980         } else if (d < 0xF0) {
2981             unsigned char c1;
2982             unsigned char c2;
2983             if (!(in < inend - 1)) {
2984                 /* trailing bytes not in input buffer */
2985                 *outlen = out - outstart;
2986                 *inlen = processed - instart;
2987                 return(-3);
2988             }
2989             c1 = *in++;
2990             if ((c1 & 0xC0) != 0x80) {
2991                 /* not a trailing byte (c1) */
2992                 *outlen = out - outstart;
2993                 *inlen = processed - instart;
2994                 return(-2);
2995             }
2996             c2 = *in++;
2997             if ((c2 & 0xC0) != 0x80) {
2998                 /* not a trailing byte (c2) */
2999                 *outlen = out - outstart;
3000                 *inlen = processed - instart;
3001                 return(-2);
3002             }
3003             c1 = c1 & 0x3F;
3004             c2 = c2 & 0x3F;
3005 	    d = d & 0x0F;
3006 	    d = xlattable [48 + c2 + xlattable [48 + c1 +
3007 			xlattable [32 + d] * 64] * 64];
3008             if (d == 0) {
3009                 /* not in character set */
3010                 *outlen = out - outstart;
3011                 *inlen = processed - instart;
3012                 return(-2);
3013             }
3014             *out++ = d;
3015         } else {
3016             /* cannot transcode >= U+010000 */
3017             *outlen = out - outstart;
3018             *inlen = processed - instart;
3019             return(-2);
3020         }
3021         processed = in;
3022     }
3023     *outlen = out - outstart;
3024     *inlen = processed - instart;
3025     return(*outlen);
3026 }
3027 
3028 /**
3029  * ISO8859xToUTF8
3030  * @out:  a pointer to an array of bytes to store the result
3031  * @outlen:  the length of @out
3032  * @in:  a pointer to an array of ISO Latin 1 chars
3033  * @inlen:  the length of @in
3034  *
3035  * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3036  * block of chars out.
3037  * Returns 0 if success, or -1 otherwise
3038  * The value of @inlen after return is the number of octets consumed
3039  * The value of @outlen after return is the number of octets produced.
3040  */
3041 static int
ISO8859xToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,unsigned short const * unicodetable)3042 ISO8859xToUTF8(unsigned char* out, int *outlen,
3043               const unsigned char* in, int *inlen,
3044               unsigned short const *unicodetable) {
3045     unsigned char* outstart = out;
3046     unsigned char* outend;
3047     const unsigned char* instart = in;
3048     const unsigned char* inend;
3049     const unsigned char* instop;
3050     unsigned int c;
3051 
3052     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3053         (in == NULL) || (unicodetable == NULL))
3054 	return(-1);
3055     outend = out + *outlen;
3056     inend = in + *inlen;
3057     instop = inend;
3058 
3059     while ((in < inend) && (out < outend - 2)) {
3060         if (*in >= 0x80) {
3061             c = unicodetable [*in - 0x80];
3062             if (c == 0) {
3063                 /* undefined code point */
3064                 *outlen = out - outstart;
3065                 *inlen = in - instart;
3066                 return (-1);
3067             }
3068             if (c < 0x800) {
3069                 *out++ = ((c >>  6) & 0x1F) | 0xC0;
3070                 *out++ = (c & 0x3F) | 0x80;
3071             } else {
3072                 *out++ = ((c >>  12) & 0x0F) | 0xE0;
3073                 *out++ = ((c >>  6) & 0x3F) | 0x80;
3074                 *out++ = (c & 0x3F) | 0x80;
3075             }
3076             ++in;
3077         }
3078         if (instop - in > outend - out) instop = in + (outend - out);
3079         while ((*in < 0x80) && (in < instop)) {
3080             *out++ = *in++;
3081         }
3082     }
3083     if ((in < inend) && (out < outend) && (*in < 0x80)) {
3084         *out++ =  *in++;
3085     }
3086     if ((in < inend) && (out < outend) && (*in < 0x80)) {
3087         *out++ =  *in++;
3088     }
3089     *outlen = out - outstart;
3090     *inlen = in - instart;
3091     return (*outlen);
3092 }
3093 
3094 
3095 /************************************************************************
3096  * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
3097  ************************************************************************/
3098 
3099 static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3100     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3101     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3102     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3103     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3104     0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3105     0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3106     0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3107     0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3108     0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3109     0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3110     0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3111     0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3112     0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3113     0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3114     0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3115     0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3116 };
3117 
3118 static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3119     "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3120     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3121     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3122     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3123     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3124     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3125     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3126     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3127     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3128     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3129     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3130     "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3131     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3132     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3133     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3134     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3135     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3136     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3137     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3138     "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3139     "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3140     "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3141     "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3142     "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3143     "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3144     "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3145     "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3146 };
3147 
3148 static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3149     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3150     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3151     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3152     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3153     0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3154     0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3155     0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3156     0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3157     0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3158     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3159     0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3160     0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3161     0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3162     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3163     0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3164     0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3165 };
3166 
3167 static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3168     "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3169     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3170     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3171     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3172     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3173     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3174     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3175     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3176     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3177     "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3178     "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3179     "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3180     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3181     "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3182     "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3183     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3184     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3185     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3186     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3187     "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3188     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3189     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3190     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3191     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3192     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3193     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3194     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3195     "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3196     "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3197     "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3198     "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3199 };
3200 
3201 static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3202     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3203     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3204     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3205     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3206     0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3207     0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3208     0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3209     0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3210     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3211     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3212     0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3213     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3214     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3215     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3216     0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3217     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3218 };
3219 
3220 static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3221     "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3222     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3223     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3224     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3225     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3226     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3227     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3229     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3230     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3231     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3232     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3233     "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3234     "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3235     "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3236     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3237     "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3238     "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3239     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3240     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3241     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3242     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3243     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3244     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3245     "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3246     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3247     "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3248 };
3249 
3250 static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3251     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3252     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3253     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3254     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3255     0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3256     0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3257     0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3258     0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3259     0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3260     0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3261     0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3262     0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3263     0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3264     0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3265     0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3266     0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3267 };
3268 
3269 static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3270     "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3271     "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3272     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3273     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3274     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3275     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3276     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3277     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3278     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3279     "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3280     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3281     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3282     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3283     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3284     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3285     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3286     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3287     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3288     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3289     "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3290     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3291     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3292     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3294     "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3297 };
3298 
3299 static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3300     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3301     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3302     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3303     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3304     0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3305     0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3306     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3307     0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3308     0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3309     0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3310     0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3311     0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3312     0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3313     0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3314     0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3315     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3316 };
3317 
3318 static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3319     "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3320     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3321     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3322     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3323     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3324     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3325     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3326     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3327     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3328     "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3329     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3330     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3331     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3332     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3333     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3334     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3335     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3336     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3337     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3338     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3339     "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3340     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3341     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3342 };
3343 
3344 static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3345     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3346     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3347     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3348     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3349     0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3350     0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3351     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3352     0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3353     0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3354     0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3355     0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3356     0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3357     0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3358     0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3359     0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3360     0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3361 };
3362 
3363 static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3364     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3365     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3368     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3369     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3370     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3371     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3372     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3373     "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3374     "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3375     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3376     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3377     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3378     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3379     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3380     "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3381     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3383     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3384     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3386     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3387     "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3388     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3389     "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3390     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3391     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3392     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3393     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3394     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3395 };
3396 
3397 static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3398     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3399     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3400     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3401     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3402     0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3403     0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3404     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3405     0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3406     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3407     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3408     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3409     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3410     0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3411     0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3412     0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3413     0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3414 };
3415 
3416 static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3417     "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3418     "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3419     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3420     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3421     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3422     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3423     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3424     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3425     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3426     "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3427     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3428     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3429     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3430     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3431     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3432     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3433     "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3434     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3435     "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3436     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3437     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3438     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3439     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3440     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3441     "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3442     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3443     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3444     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3445     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3446     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3447     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3448 };
3449 
3450 static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3451     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3452     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3453     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3454     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3455     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3456     0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3457     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3458     0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3459     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3460     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3461     0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3462     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3463     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3464     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3465     0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3466     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3467 };
3468 
3469 static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3470     "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3471     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3472     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3474     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3475     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3477     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3478     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3479     "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3480     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3481     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3482     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3483     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3484     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3485     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3486     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3487     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3488     "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3489     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3490     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3491     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3492     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3493 };
3494 
3495 static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3496     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3497     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3498     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3499     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3500     0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3501     0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3502     0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3503     0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3504     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3505     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3506     0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3507     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3508     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3509     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3510     0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3511     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3512 };
3513 
3514 static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3515     "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3516     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3517     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3518     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3519     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3520     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3521     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3522     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3523     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3524     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3525     "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3526     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3527     "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3528     "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3529     "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3530     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3531     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3532     "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3533     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3534     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3535     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3536     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3537     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3538     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3539     "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3540     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3541     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3542     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3543     "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3544     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3545     "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3546 };
3547 
3548 static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3549     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3550     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3551     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3552     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3553     0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3554     0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3555     0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3556     0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3557     0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3558     0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3559     0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3560     0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3561     0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3562     0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3563     0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3564     0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3565 };
3566 
3567 static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3568     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3569     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3570     "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3571     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3572     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3574     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3575     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3576     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3577     "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3578     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3579     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3580     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3581     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3582     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3583     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3584     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3585     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3586     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3587     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3588     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3589     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3590     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3591     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3592     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3593     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3594     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3595 };
3596 
3597 static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3598     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3599     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3600     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3601     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3602     0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3603     0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3604     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3605     0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3606     0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3607     0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3608     0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3609     0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3610     0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3611     0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3612     0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3613     0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3614 };
3615 
3616 static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3617     "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3618     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3619     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3620     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3621     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3622     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3623     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3624     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3625     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3626     "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3627     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3628     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3630     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3633     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3634     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3635     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3636     "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3637     "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3638     "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3639     "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3640     "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3641     "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3642     "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3643     "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3644     "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3645     "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3646     "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3647     "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3648 };
3649 
3650 static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3651     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3652     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3653     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3654     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3655     0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3656     0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3657     0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3658     0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3659     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3660     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3661     0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3662     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3663     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3664     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3665     0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3666     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3667 };
3668 
3669 static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3670     "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3671     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3672     "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3673     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3674     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3675     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3676     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3678     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3679     "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3680     "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3681     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3682     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3683     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3684     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3685     "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3686     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3687     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3688     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3689     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3690     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691     "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3692     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3693     "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3695     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3696     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3697     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3698     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3699     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3700     "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3701     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3702     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3703     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3704     "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3705     "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3706     "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3707     "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3708     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3709     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3710     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3711     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3712     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3713 };
3714 
3715 static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3716     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3717     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3718     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3719     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3720     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3721     0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3722     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3723     0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3724     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3725     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3726     0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3727     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3728     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3729     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3730     0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3731     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3732 };
3733 
3734 static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3735     "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3736     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3737     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3738     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3739     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3740     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3741     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3742     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3743     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3744     "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3745     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3746     "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3747     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3748     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3749     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3750     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3751     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3752     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3753     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3754     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3755     "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3756     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3757     "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3758     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3759     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3760     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3761     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3762 };
3763 
3764 static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3765     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3766     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3767     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3768     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3769     0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3770     0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3771     0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3772     0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3773     0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3774     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3775     0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3776     0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3777     0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3778     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3779     0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3780     0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3781 };
3782 
3783 static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3784     "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3785     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3786     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3787     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3788     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3789     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3790     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3791     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3792     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3793     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3794     "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3795     "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3796     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3797     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3798     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3799     "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3800     "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3801     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3802     "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3803     "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3804     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3805     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3806     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3807     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3808     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3809     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3810     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3811     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3812     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3813     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3814     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3815     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3816     "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3817     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3818     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3819     "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3820     "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3821     "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3822     "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3823 };
3824 
3825 
3826 /*
3827  * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3828  */
3829 
ISO8859_2ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3830 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3831     const unsigned char* in, int *inlen) {
3832     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3833 }
UTF8ToISO8859_2(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3834 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3835     const unsigned char* in, int *inlen) {
3836     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3837 }
3838 
ISO8859_3ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3839 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3840     const unsigned char* in, int *inlen) {
3841     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3842 }
UTF8ToISO8859_3(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3843 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3844     const unsigned char* in, int *inlen) {
3845     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3846 }
3847 
ISO8859_4ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3848 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3849     const unsigned char* in, int *inlen) {
3850     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3851 }
UTF8ToISO8859_4(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3852 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3853     const unsigned char* in, int *inlen) {
3854     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3855 }
3856 
ISO8859_5ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3857 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3858     const unsigned char* in, int *inlen) {
3859     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3860 }
UTF8ToISO8859_5(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3861 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3862     const unsigned char* in, int *inlen) {
3863     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3864 }
3865 
ISO8859_6ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3866 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3867     const unsigned char* in, int *inlen) {
3868     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3869 }
UTF8ToISO8859_6(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3870 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3871     const unsigned char* in, int *inlen) {
3872     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3873 }
3874 
ISO8859_7ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3875 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3876     const unsigned char* in, int *inlen) {
3877     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3878 }
UTF8ToISO8859_7(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3879 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3880     const unsigned char* in, int *inlen) {
3881     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3882 }
3883 
ISO8859_8ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3884 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3885     const unsigned char* in, int *inlen) {
3886     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3887 }
UTF8ToISO8859_8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3888 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3889     const unsigned char* in, int *inlen) {
3890     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3891 }
3892 
ISO8859_9ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3893 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3894     const unsigned char* in, int *inlen) {
3895     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3896 }
UTF8ToISO8859_9(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3897 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3898     const unsigned char* in, int *inlen) {
3899     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3900 }
3901 
ISO8859_10ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3902 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3903     const unsigned char* in, int *inlen) {
3904     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3905 }
UTF8ToISO8859_10(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3906 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3907     const unsigned char* in, int *inlen) {
3908     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3909 }
3910 
ISO8859_11ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3911 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3912     const unsigned char* in, int *inlen) {
3913     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3914 }
UTF8ToISO8859_11(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3915 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3916     const unsigned char* in, int *inlen) {
3917     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3918 }
3919 
ISO8859_13ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3920 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3921     const unsigned char* in, int *inlen) {
3922     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3923 }
UTF8ToISO8859_13(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3924 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3925     const unsigned char* in, int *inlen) {
3926     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3927 }
3928 
ISO8859_14ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3929 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3930     const unsigned char* in, int *inlen) {
3931     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3932 }
UTF8ToISO8859_14(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3933 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3934     const unsigned char* in, int *inlen) {
3935     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3936 }
3937 
ISO8859_15ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3938 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3939     const unsigned char* in, int *inlen) {
3940     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3941 }
UTF8ToISO8859_15(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3942 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3943     const unsigned char* in, int *inlen) {
3944     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3945 }
3946 
ISO8859_16ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3947 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3948     const unsigned char* in, int *inlen) {
3949     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3950 }
UTF8ToISO8859_16(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3951 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3952     const unsigned char* in, int *inlen) {
3953     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3954 }
3955 
3956 static void
xmlRegisterCharEncodingHandlersISO8859x(void)3957 xmlRegisterCharEncodingHandlersISO8859x (void) {
3958     xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3959     xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3960     xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3961     xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3962     xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3963     xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3964     xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3965     xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3966     xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3967     xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3968     xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3969     xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3970     xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3971     xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3972 }
3973 
3974 #endif
3975 #endif
3976 
3977 #define bottom_encoding
3978 #include "elfgcchack.h"
3979