1 /*
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000-2017 Expat development team
11 Licensed under the MIT license:
12
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to permit
18 persons to whom the Software is furnished to do so, subject to the
19 following conditions:
20
21 The above copyright notice and this permission notice shall be included
22 in all copies or substantial portions of the Software.
23
24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
29 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30 USE OR OTHER DEALINGS IN THE SOFTWARE.
31 */
32
33 #include <stddef.h>
34 #include <string.h> /* memcpy */
35
36 #if defined(_MSC_VER) && (_MSC_VER <= 1700)
37 /* for vs2012/11.0/1700 and earlier Visual Studio compilers */
38 # define bool int
39 # define false 0
40 # define true 1
41 #else
42 # include <stdbool.h>
43 #endif
44
45
46 #ifdef _WIN32
47 #include "winconfig.h"
48 #else
49 #ifdef HAVE_EXPAT_CONFIG_H
50 #include <expat_config.h>
51 #endif
52 #endif /* ndef _WIN32 */
53
54 #include "expat_external.h"
55 #include "internal.h"
56 #include "xmltok.h"
57 #include "nametab.h"
58
59 #ifdef XML_DTD
60 #define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok)
61 #else
62 #define IGNORE_SECTION_TOK_VTABLE /* as nothing */
63 #endif
64
65 #define VTABLE1 \
66 { PREFIX(prologTok), PREFIX(contentTok), \
67 PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \
68 { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \
69 PREFIX(nameMatchesAscii), \
70 PREFIX(nameLength), \
71 PREFIX(skipS), \
72 PREFIX(getAtts), \
73 PREFIX(charRefNumber), \
74 PREFIX(predefinedEntityName), \
75 PREFIX(updatePosition), \
76 PREFIX(isPublicId)
77
78 #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
79
80 #define UCS2_GET_NAMING(pages, hi, lo) \
81 (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F)))
82
83 /* A 2 byte UTF-8 representation splits the characters 11 bits between
84 the bottom 5 and 6 bits of the bytes. We need 8 bits to index into
85 pages, 3 bits to add to that index and 5 bits to generate the mask.
86 */
87 #define UTF8_GET_NAMING2(pages, byte) \
88 (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
89 + ((((byte)[0]) & 3) << 1) \
90 + ((((byte)[1]) >> 5) & 1)] \
91 & (1u << (((byte)[1]) & 0x1F)))
92
93 /* A 3 byte UTF-8 representation splits the characters 16 bits between
94 the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index
95 into pages, 3 bits to add to that index and 5 bits to generate the
96 mask.
97 */
98 #define UTF8_GET_NAMING3(pages, byte) \
99 (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \
100 + ((((byte)[1]) >> 2) & 0xF)] \
101 << 3) \
102 + ((((byte)[1]) & 3) << 1) \
103 + ((((byte)[2]) >> 5) & 1)] \
104 & (1u << (((byte)[2]) & 0x1F)))
105
106 #define UTF8_GET_NAMING(pages, p, n) \
107 ((n) == 2 \
108 ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
109 : ((n) == 3 \
110 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \
111 : 0))
112
113 /* Detection of invalid UTF-8 sequences is based on Table 3.1B
114 of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
115 with the additional restriction of not allowing the Unicode
116 code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE).
117 Implementation details:
118 (A & 0x80) == 0 means A < 0x80
119 and
120 (A & 0xC0) == 0xC0 means A > 0xBF
121 */
122
123 #define UTF8_INVALID2(p) \
124 ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0)
125
126 #define UTF8_INVALID3(p) \
127 (((p)[2] & 0x80) == 0 \
128 || \
129 ((*p) == 0xEF && (p)[1] == 0xBF \
130 ? \
131 (p)[2] > 0xBD \
132 : \
133 ((p)[2] & 0xC0) == 0xC0) \
134 || \
135 ((*p) == 0xE0 \
136 ? \
137 (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \
138 : \
139 ((p)[1] & 0x80) == 0 \
140 || \
141 ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0)))
142
143 #define UTF8_INVALID4(p) \
144 (((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 \
145 || \
146 ((p)[2] & 0x80) == 0 || ((p)[2] & 0xC0) == 0xC0 \
147 || \
148 ((*p) == 0xF0 \
149 ? \
150 (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \
151 : \
152 ((p)[1] & 0x80) == 0 \
153 || \
154 ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
155
156 static int PTRFASTCALL
isNever(const ENCODING * UNUSED_P (enc),const char * UNUSED_P (p))157 isNever(const ENCODING *UNUSED_P(enc), const char *UNUSED_P(p))
158 {
159 return 0;
160 }
161
162 static int PTRFASTCALL
utf8_isName2(const ENCODING * UNUSED_P (enc),const char * p)163 utf8_isName2(const ENCODING *UNUSED_P(enc), const char *p)
164 {
165 return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
166 }
167
168 static int PTRFASTCALL
utf8_isName3(const ENCODING * UNUSED_P (enc),const char * p)169 utf8_isName3(const ENCODING *UNUSED_P(enc), const char *p)
170 {
171 return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
172 }
173
174 #define utf8_isName4 isNever
175
176 static int PTRFASTCALL
utf8_isNmstrt2(const ENCODING * UNUSED_P (enc),const char * p)177 utf8_isNmstrt2(const ENCODING *UNUSED_P(enc), const char *p)
178 {
179 return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
180 }
181
182 static int PTRFASTCALL
utf8_isNmstrt3(const ENCODING * UNUSED_P (enc),const char * p)183 utf8_isNmstrt3(const ENCODING *UNUSED_P(enc), const char *p)
184 {
185 return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
186 }
187
188 #define utf8_isNmstrt4 isNever
189
190 static int PTRFASTCALL
utf8_isInvalid2(const ENCODING * UNUSED_P (enc),const char * p)191 utf8_isInvalid2(const ENCODING *UNUSED_P(enc), const char *p)
192 {
193 return UTF8_INVALID2((const unsigned char *)p);
194 }
195
196 static int PTRFASTCALL
utf8_isInvalid3(const ENCODING * UNUSED_P (enc),const char * p)197 utf8_isInvalid3(const ENCODING *UNUSED_P(enc), const char *p)
198 {
199 return UTF8_INVALID3((const unsigned char *)p);
200 }
201
202 static int PTRFASTCALL
utf8_isInvalid4(const ENCODING * UNUSED_P (enc),const char * p)203 utf8_isInvalid4(const ENCODING *UNUSED_P(enc), const char *p)
204 {
205 return UTF8_INVALID4((const unsigned char *)p);
206 }
207
208 struct normal_encoding {
209 ENCODING enc;
210 unsigned char type[256];
211 #ifdef XML_MIN_SIZE
212 int (PTRFASTCALL *byteType)(const ENCODING *, const char *);
213 int (PTRFASTCALL *isNameMin)(const ENCODING *, const char *);
214 int (PTRFASTCALL *isNmstrtMin)(const ENCODING *, const char *);
215 int (PTRFASTCALL *byteToAscii)(const ENCODING *, const char *);
216 int (PTRCALL *charMatches)(const ENCODING *, const char *, int);
217 #endif /* XML_MIN_SIZE */
218 int (PTRFASTCALL *isName2)(const ENCODING *, const char *);
219 int (PTRFASTCALL *isName3)(const ENCODING *, const char *);
220 int (PTRFASTCALL *isName4)(const ENCODING *, const char *);
221 int (PTRFASTCALL *isNmstrt2)(const ENCODING *, const char *);
222 int (PTRFASTCALL *isNmstrt3)(const ENCODING *, const char *);
223 int (PTRFASTCALL *isNmstrt4)(const ENCODING *, const char *);
224 int (PTRFASTCALL *isInvalid2)(const ENCODING *, const char *);
225 int (PTRFASTCALL *isInvalid3)(const ENCODING *, const char *);
226 int (PTRFASTCALL *isInvalid4)(const ENCODING *, const char *);
227 };
228
229 #define AS_NORMAL_ENCODING(enc) ((const struct normal_encoding *) (enc))
230
231 #ifdef XML_MIN_SIZE
232
233 #define STANDARD_VTABLE(E) \
234 E ## byteType, \
235 E ## isNameMin, \
236 E ## isNmstrtMin, \
237 E ## byteToAscii, \
238 E ## charMatches,
239
240 #else
241
242 #define STANDARD_VTABLE(E) /* as nothing */
243
244 #endif
245
246 #define NORMAL_VTABLE(E) \
247 E ## isName2, \
248 E ## isName3, \
249 E ## isName4, \
250 E ## isNmstrt2, \
251 E ## isNmstrt3, \
252 E ## isNmstrt4, \
253 E ## isInvalid2, \
254 E ## isInvalid3, \
255 E ## isInvalid4
256
257 #define NULL_VTABLE \
258 /* isName2 */ NULL, \
259 /* isName3 */ NULL, \
260 /* isName4 */ NULL, \
261 /* isNmstrt2 */ NULL, \
262 /* isNmstrt3 */ NULL, \
263 /* isNmstrt4 */ NULL, \
264 /* isInvalid2 */ NULL, \
265 /* isInvalid3 */ NULL, \
266 /* isInvalid4 */ NULL
267
268 static int FASTCALL checkCharRefNumber(int);
269
270 #include "xmltok_impl.h"
271 #include "ascii.h"
272
273 #ifdef XML_MIN_SIZE
274 #define sb_isNameMin isNever
275 #define sb_isNmstrtMin isNever
276 #endif
277
278 #ifdef XML_MIN_SIZE
279 #define MINBPC(enc) ((enc)->minBytesPerChar)
280 #else
281 /* minimum bytes per character */
282 #define MINBPC(enc) 1
283 #endif
284
285 #define SB_BYTE_TYPE(enc, p) \
286 (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
287
288 #ifdef XML_MIN_SIZE
289 static int PTRFASTCALL
sb_byteType(const ENCODING * enc,const char * p)290 sb_byteType(const ENCODING *enc, const char *p)
291 {
292 return SB_BYTE_TYPE(enc, p);
293 }
294 #define BYTE_TYPE(enc, p) \
295 (AS_NORMAL_ENCODING(enc)->byteType(enc, p))
296 #else
297 #define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p)
298 #endif
299
300 #ifdef XML_MIN_SIZE
301 #define BYTE_TO_ASCII(enc, p) \
302 (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p))
303 static int PTRFASTCALL
sb_byteToAscii(const ENCODING * enc,const char * p)304 sb_byteToAscii(const ENCODING *enc, const char *p)
305 {
306 return *p;
307 }
308 #else
309 #define BYTE_TO_ASCII(enc, p) (*(p))
310 #endif
311
312 #define IS_NAME_CHAR(enc, p, n) \
313 (AS_NORMAL_ENCODING(enc)->isName ## n(enc, p))
314 #define IS_NMSTRT_CHAR(enc, p, n) \
315 (AS_NORMAL_ENCODING(enc)->isNmstrt ## n(enc, p))
316 #define IS_INVALID_CHAR(enc, p, n) \
317 (AS_NORMAL_ENCODING(enc)->isInvalid ## n(enc, p))
318
319 #ifdef XML_MIN_SIZE
320 #define IS_NAME_CHAR_MINBPC(enc, p) \
321 (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p))
322 #define IS_NMSTRT_CHAR_MINBPC(enc, p) \
323 (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p))
324 #else
325 #define IS_NAME_CHAR_MINBPC(enc, p) (0)
326 #define IS_NMSTRT_CHAR_MINBPC(enc, p) (0)
327 #endif
328
329 #ifdef XML_MIN_SIZE
330 #define CHAR_MATCHES(enc, p, c) \
331 (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c))
332 static int PTRCALL
sb_charMatches(const ENCODING * enc,const char * p,int c)333 sb_charMatches(const ENCODING *enc, const char *p, int c)
334 {
335 return *p == c;
336 }
337 #else
338 /* c is an ASCII character */
339 #define CHAR_MATCHES(enc, p, c) (*(p) == c)
340 #endif
341
342 #define PREFIX(ident) normal_ ## ident
343 #define XML_TOK_IMPL_C
344 #include "xmltok_impl.c"
345 #undef XML_TOK_IMPL_C
346
347 #undef MINBPC
348 #undef BYTE_TYPE
349 #undef BYTE_TO_ASCII
350 #undef CHAR_MATCHES
351 #undef IS_NAME_CHAR
352 #undef IS_NAME_CHAR_MINBPC
353 #undef IS_NMSTRT_CHAR
354 #undef IS_NMSTRT_CHAR_MINBPC
355 #undef IS_INVALID_CHAR
356
357 enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
358 UTF8_cval1 = 0x00,
359 UTF8_cval2 = 0xc0,
360 UTF8_cval3 = 0xe0,
361 UTF8_cval4 = 0xf0
362 };
363
364 void
_INTERNAL_trim_to_complete_utf8_characters(const char * from,const char ** fromLimRef)365 _INTERNAL_trim_to_complete_utf8_characters(const char * from, const char ** fromLimRef)
366 {
367 const char * fromLim = *fromLimRef;
368 size_t walked = 0;
369 for (; fromLim > from; fromLim--, walked++) {
370 const unsigned char prev = (unsigned char)fromLim[-1];
371 if ((prev & 0xf8u) == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */
372 if (walked + 1 >= 4) {
373 fromLim += 4 - 1;
374 break;
375 } else {
376 walked = 0;
377 }
378 } else if ((prev & 0xf0u) == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */
379 if (walked + 1 >= 3) {
380 fromLim += 3 - 1;
381 break;
382 } else {
383 walked = 0;
384 }
385 } else if ((prev & 0xe0u) == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */
386 if (walked + 1 >= 2) {
387 fromLim += 2 - 1;
388 break;
389 } else {
390 walked = 0;
391 }
392 } else if ((prev & 0x80u) == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */
393 break;
394 }
395 }
396 *fromLimRef = fromLim;
397 }
398
399 static enum XML_Convert_Result PTRCALL
utf8_toUtf8(const ENCODING * UNUSED_P (enc),const char ** fromP,const char * fromLim,char ** toP,const char * toLim)400 utf8_toUtf8(const ENCODING *UNUSED_P(enc),
401 const char **fromP, const char *fromLim,
402 char **toP, const char *toLim)
403 {
404 bool input_incomplete = false;
405 bool output_exhausted = false;
406
407 /* Avoid copying partial characters (due to limited space). */
408 const ptrdiff_t bytesAvailable = fromLim - *fromP;
409 const ptrdiff_t bytesStorable = toLim - *toP;
410 if (bytesAvailable > bytesStorable) {
411 fromLim = *fromP + bytesStorable;
412 output_exhausted = true;
413 }
414
415 /* Avoid copying partial characters (from incomplete input). */
416 {
417 const char * const fromLimBefore = fromLim;
418 _INTERNAL_trim_to_complete_utf8_characters(*fromP, &fromLim);
419 if (fromLim < fromLimBefore) {
420 input_incomplete = true;
421 }
422 }
423
424 {
425 const ptrdiff_t bytesToCopy = fromLim - *fromP;
426 memcpy(*toP, *fromP, bytesToCopy);
427 *fromP += bytesToCopy;
428 *toP += bytesToCopy;
429 }
430
431 if (output_exhausted) /* needs to go first */
432 return XML_CONVERT_OUTPUT_EXHAUSTED;
433 else if (input_incomplete)
434 return XML_CONVERT_INPUT_INCOMPLETE;
435 else
436 return XML_CONVERT_COMPLETED;
437 }
438
439 static enum XML_Convert_Result PTRCALL
utf8_toUtf16(const ENCODING * enc,const char ** fromP,const char * fromLim,unsigned short ** toP,const unsigned short * toLim)440 utf8_toUtf16(const ENCODING *enc,
441 const char **fromP, const char *fromLim,
442 unsigned short **toP, const unsigned short *toLim)
443 {
444 enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
445 unsigned short *to = *toP;
446 const char *from = *fromP;
447 while (from < fromLim && to < toLim) {
448 switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
449 case BT_LEAD2:
450 if (fromLim - from < 2) {
451 res = XML_CONVERT_INPUT_INCOMPLETE;
452 goto after;
453 }
454 *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f));
455 from += 2;
456 break;
457 case BT_LEAD3:
458 if (fromLim - from < 3) {
459 res = XML_CONVERT_INPUT_INCOMPLETE;
460 goto after;
461 }
462 *to++ = (unsigned short)(((from[0] & 0xf) << 12)
463 | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f));
464 from += 3;
465 break;
466 case BT_LEAD4:
467 {
468 unsigned long n;
469 if (toLim - to < 2) {
470 res = XML_CONVERT_OUTPUT_EXHAUSTED;
471 goto after;
472 }
473 if (fromLim - from < 4) {
474 res = XML_CONVERT_INPUT_INCOMPLETE;
475 goto after;
476 }
477 n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
478 | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
479 n -= 0x10000;
480 to[0] = (unsigned short)((n >> 10) | 0xD800);
481 to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);
482 to += 2;
483 from += 4;
484 }
485 break;
486 default:
487 *to++ = *from++;
488 break;
489 }
490 }
491 if (from < fromLim)
492 res = XML_CONVERT_OUTPUT_EXHAUSTED;
493 after:
494 *fromP = from;
495 *toP = to;
496 return res;
497 }
498
499 #ifdef XML_NS
500 static const struct normal_encoding utf8_encoding_ns = {
501 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
502 {
503 #include "asciitab.h"
504 #include "utf8tab.h"
505 },
506 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
507 };
508 #endif
509
510 static const struct normal_encoding utf8_encoding = {
511 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
512 {
513 #define BT_COLON BT_NMSTRT
514 #include "asciitab.h"
515 #undef BT_COLON
516 #include "utf8tab.h"
517 },
518 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
519 };
520
521 #ifdef XML_NS
522
523 static const struct normal_encoding internal_utf8_encoding_ns = {
524 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
525 {
526 #include "iasciitab.h"
527 #include "utf8tab.h"
528 },
529 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
530 };
531
532 #endif
533
534 static const struct normal_encoding internal_utf8_encoding = {
535 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
536 {
537 #define BT_COLON BT_NMSTRT
538 #include "iasciitab.h"
539 #undef BT_COLON
540 #include "utf8tab.h"
541 },
542 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
543 };
544
545 static enum XML_Convert_Result PTRCALL
latin1_toUtf8(const ENCODING * UNUSED_P (enc),const char ** fromP,const char * fromLim,char ** toP,const char * toLim)546 latin1_toUtf8(const ENCODING *UNUSED_P(enc),
547 const char **fromP, const char *fromLim,
548 char **toP, const char *toLim)
549 {
550 for (;;) {
551 unsigned char c;
552 if (*fromP == fromLim)
553 return XML_CONVERT_COMPLETED;
554 c = (unsigned char)**fromP;
555 if (c & 0x80) {
556 if (toLim - *toP < 2)
557 return XML_CONVERT_OUTPUT_EXHAUSTED;
558 *(*toP)++ = (char)((c >> 6) | UTF8_cval2);
559 *(*toP)++ = (char)((c & 0x3f) | 0x80);
560 (*fromP)++;
561 }
562 else {
563 if (*toP == toLim)
564 return XML_CONVERT_OUTPUT_EXHAUSTED;
565 *(*toP)++ = *(*fromP)++;
566 }
567 }
568 }
569
570 static enum XML_Convert_Result PTRCALL
latin1_toUtf16(const ENCODING * UNUSED_P (enc),const char ** fromP,const char * fromLim,unsigned short ** toP,const unsigned short * toLim)571 latin1_toUtf16(const ENCODING *UNUSED_P(enc),
572 const char **fromP, const char *fromLim,
573 unsigned short **toP, const unsigned short *toLim)
574 {
575 while (*fromP < fromLim && *toP < toLim)
576 *(*toP)++ = (unsigned char)*(*fromP)++;
577
578 if ((*toP == toLim) && (*fromP < fromLim))
579 return XML_CONVERT_OUTPUT_EXHAUSTED;
580 else
581 return XML_CONVERT_COMPLETED;
582 }
583
584 #ifdef XML_NS
585
586 static const struct normal_encoding latin1_encoding_ns = {
587 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
588 {
589 #include "asciitab.h"
590 #include "latin1tab.h"
591 },
592 STANDARD_VTABLE(sb_) NULL_VTABLE
593 };
594
595 #endif
596
597 static const struct normal_encoding latin1_encoding = {
598 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
599 {
600 #define BT_COLON BT_NMSTRT
601 #include "asciitab.h"
602 #undef BT_COLON
603 #include "latin1tab.h"
604 },
605 STANDARD_VTABLE(sb_) NULL_VTABLE
606 };
607
608 static enum XML_Convert_Result PTRCALL
ascii_toUtf8(const ENCODING * UNUSED_P (enc),const char ** fromP,const char * fromLim,char ** toP,const char * toLim)609 ascii_toUtf8(const ENCODING *UNUSED_P(enc),
610 const char **fromP, const char *fromLim,
611 char **toP, const char *toLim)
612 {
613 while (*fromP < fromLim && *toP < toLim)
614 *(*toP)++ = *(*fromP)++;
615
616 if ((*toP == toLim) && (*fromP < fromLim))
617 return XML_CONVERT_OUTPUT_EXHAUSTED;
618 else
619 return XML_CONVERT_COMPLETED;
620 }
621
622 #ifdef XML_NS
623
624 static const struct normal_encoding ascii_encoding_ns = {
625 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
626 {
627 #include "asciitab.h"
628 /* BT_NONXML == 0 */
629 },
630 STANDARD_VTABLE(sb_) NULL_VTABLE
631 };
632
633 #endif
634
635 static const struct normal_encoding ascii_encoding = {
636 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
637 {
638 #define BT_COLON BT_NMSTRT
639 #include "asciitab.h"
640 #undef BT_COLON
641 /* BT_NONXML == 0 */
642 },
643 STANDARD_VTABLE(sb_) NULL_VTABLE
644 };
645
646 static int PTRFASTCALL
unicode_byte_type(char hi,char lo)647 unicode_byte_type(char hi, char lo)
648 {
649 switch ((unsigned char)hi) {
650 case 0xD8: case 0xD9: case 0xDA: case 0xDB:
651 return BT_LEAD4;
652 case 0xDC: case 0xDD: case 0xDE: case 0xDF:
653 return BT_TRAIL;
654 case 0xFF:
655 switch ((unsigned char)lo) {
656 case 0xFF:
657 case 0xFE:
658 return BT_NONXML;
659 }
660 break;
661 }
662 return BT_NONASCII;
663 }
664
665 #define DEFINE_UTF16_TO_UTF8(E) \
666 static enum XML_Convert_Result PTRCALL \
667 E ## toUtf8(const ENCODING *UNUSED_P(enc), \
668 const char **fromP, const char *fromLim, \
669 char **toP, const char *toLim) \
670 { \
671 const char *from = *fromP; \
672 fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \
673 for (; from < fromLim; from += 2) { \
674 int plane; \
675 unsigned char lo2; \
676 unsigned char lo = GET_LO(from); \
677 unsigned char hi = GET_HI(from); \
678 switch (hi) { \
679 case 0: \
680 if (lo < 0x80) { \
681 if (*toP == toLim) { \
682 *fromP = from; \
683 return XML_CONVERT_OUTPUT_EXHAUSTED; \
684 } \
685 *(*toP)++ = lo; \
686 break; \
687 } \
688 /* fall through */ \
689 case 0x1: case 0x2: case 0x3: \
690 case 0x4: case 0x5: case 0x6: case 0x7: \
691 if (toLim - *toP < 2) { \
692 *fromP = from; \
693 return XML_CONVERT_OUTPUT_EXHAUSTED; \
694 } \
695 *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \
696 *(*toP)++ = ((lo & 0x3f) | 0x80); \
697 break; \
698 default: \
699 if (toLim - *toP < 3) { \
700 *fromP = from; \
701 return XML_CONVERT_OUTPUT_EXHAUSTED; \
702 } \
703 /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \
704 *(*toP)++ = ((hi >> 4) | UTF8_cval3); \
705 *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \
706 *(*toP)++ = ((lo & 0x3f) | 0x80); \
707 break; \
708 case 0xD8: case 0xD9: case 0xDA: case 0xDB: \
709 if (toLim - *toP < 4) { \
710 *fromP = from; \
711 return XML_CONVERT_OUTPUT_EXHAUSTED; \
712 } \
713 if (fromLim - from < 4) { \
714 *fromP = from; \
715 return XML_CONVERT_INPUT_INCOMPLETE; \
716 } \
717 plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
718 *(*toP)++ = ((plane >> 2) | UTF8_cval4); \
719 *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \
720 from += 2; \
721 lo2 = GET_LO(from); \
722 *(*toP)++ = (((lo & 0x3) << 4) \
723 | ((GET_HI(from) & 0x3) << 2) \
724 | (lo2 >> 6) \
725 | 0x80); \
726 *(*toP)++ = ((lo2 & 0x3f) | 0x80); \
727 break; \
728 } \
729 } \
730 *fromP = from; \
731 if (from < fromLim) \
732 return XML_CONVERT_INPUT_INCOMPLETE; \
733 else \
734 return XML_CONVERT_COMPLETED; \
735 }
736
737 #define DEFINE_UTF16_TO_UTF16(E) \
738 static enum XML_Convert_Result PTRCALL \
739 E ## toUtf16(const ENCODING *UNUSED_P(enc), \
740 const char **fromP, const char *fromLim, \
741 unsigned short **toP, const unsigned short *toLim) \
742 { \
743 enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \
744 fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */ \
745 /* Avoid copying first half only of surrogate */ \
746 if (fromLim - *fromP > ((toLim - *toP) << 1) \
747 && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \
748 fromLim -= 2; \
749 res = XML_CONVERT_INPUT_INCOMPLETE; \
750 } \
751 for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \
752 *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
753 if ((*toP == toLim) && (*fromP < fromLim)) \
754 return XML_CONVERT_OUTPUT_EXHAUSTED; \
755 else \
756 return res; \
757 }
758
759 #define SET2(ptr, ch) \
760 (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8)))
761 #define GET_LO(ptr) ((unsigned char)(ptr)[0])
762 #define GET_HI(ptr) ((unsigned char)(ptr)[1])
763
764 DEFINE_UTF16_TO_UTF8(little2_)
DEFINE_UTF16_TO_UTF16(little2_)765 DEFINE_UTF16_TO_UTF16(little2_)
766
767 #undef SET2
768 #undef GET_LO
769 #undef GET_HI
770
771 #define SET2(ptr, ch) \
772 (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF)))
773 #define GET_LO(ptr) ((unsigned char)(ptr)[1])
774 #define GET_HI(ptr) ((unsigned char)(ptr)[0])
775
776 DEFINE_UTF16_TO_UTF8(big2_)
777 DEFINE_UTF16_TO_UTF16(big2_)
778
779 #undef SET2
780 #undef GET_LO
781 #undef GET_HI
782
783 #define LITTLE2_BYTE_TYPE(enc, p) \
784 ((p)[1] == 0 \
785 ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \
786 : unicode_byte_type((p)[1], (p)[0]))
787 #define LITTLE2_BYTE_TO_ASCII(enc, p) ((p)[1] == 0 ? (p)[0] : -1)
788 #define LITTLE2_CHAR_MATCHES(enc, p, c) ((p)[1] == 0 && (p)[0] == c)
789 #define LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) \
790 UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])
791 #define LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) \
792 UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0])
793
794 #ifdef XML_MIN_SIZE
795
796 static int PTRFASTCALL
797 little2_byteType(const ENCODING *enc, const char *p)
798 {
799 return LITTLE2_BYTE_TYPE(enc, p);
800 }
801
802 static int PTRFASTCALL
little2_byteToAscii(const ENCODING * enc,const char * p)803 little2_byteToAscii(const ENCODING *enc, const char *p)
804 {
805 return LITTLE2_BYTE_TO_ASCII(enc, p);
806 }
807
808 static int PTRCALL
little2_charMatches(const ENCODING * enc,const char * p,int c)809 little2_charMatches(const ENCODING *enc, const char *p, int c)
810 {
811 return LITTLE2_CHAR_MATCHES(enc, p, c);
812 }
813
814 static int PTRFASTCALL
little2_isNameMin(const ENCODING * enc,const char * p)815 little2_isNameMin(const ENCODING *enc, const char *p)
816 {
817 return LITTLE2_IS_NAME_CHAR_MINBPC(enc, p);
818 }
819
820 static int PTRFASTCALL
little2_isNmstrtMin(const ENCODING * enc,const char * p)821 little2_isNmstrtMin(const ENCODING *enc, const char *p)
822 {
823 return LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p);
824 }
825
826 #undef VTABLE
827 #define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16
828
829 #else /* not XML_MIN_SIZE */
830
831 #undef PREFIX
832 #define PREFIX(ident) little2_ ## ident
833 #define MINBPC(enc) 2
834 /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
835 #define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p)
836 #define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p)
837 #define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(enc, p, c)
838 #define IS_NAME_CHAR(enc, p, n) 0
839 #define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(enc, p)
840 #define IS_NMSTRT_CHAR(enc, p, n) (0)
841 #define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p)
842
843 #define XML_TOK_IMPL_C
844 #include "xmltok_impl.c"
845 #undef XML_TOK_IMPL_C
846
847 #undef MINBPC
848 #undef BYTE_TYPE
849 #undef BYTE_TO_ASCII
850 #undef CHAR_MATCHES
851 #undef IS_NAME_CHAR
852 #undef IS_NAME_CHAR_MINBPC
853 #undef IS_NMSTRT_CHAR
854 #undef IS_NMSTRT_CHAR_MINBPC
855 #undef IS_INVALID_CHAR
856
857 #endif /* not XML_MIN_SIZE */
858
859 #ifdef XML_NS
860
861 static const struct normal_encoding little2_encoding_ns = {
862 { VTABLE, 2, 0,
863 #if BYTEORDER == 1234
864 1
865 #else
866 0
867 #endif
868 },
869 {
870 #include "asciitab.h"
871 #include "latin1tab.h"
872 },
873 STANDARD_VTABLE(little2_) NULL_VTABLE
874 };
875
876 #endif
877
878 static const struct normal_encoding little2_encoding = {
879 { VTABLE, 2, 0,
880 #if BYTEORDER == 1234
881 1
882 #else
883 0
884 #endif
885 },
886 {
887 #define BT_COLON BT_NMSTRT
888 #include "asciitab.h"
889 #undef BT_COLON
890 #include "latin1tab.h"
891 },
892 STANDARD_VTABLE(little2_) NULL_VTABLE
893 };
894
895 #if BYTEORDER != 4321
896
897 #ifdef XML_NS
898
899 static const struct normal_encoding internal_little2_encoding_ns = {
900 { VTABLE, 2, 0, 1 },
901 {
902 #include "iasciitab.h"
903 #include "latin1tab.h"
904 },
905 STANDARD_VTABLE(little2_) NULL_VTABLE
906 };
907
908 #endif
909
910 static const struct normal_encoding internal_little2_encoding = {
911 { VTABLE, 2, 0, 1 },
912 {
913 #define BT_COLON BT_NMSTRT
914 #include "iasciitab.h"
915 #undef BT_COLON
916 #include "latin1tab.h"
917 },
918 STANDARD_VTABLE(little2_) NULL_VTABLE
919 };
920
921 #endif
922
923
924 #define BIG2_BYTE_TYPE(enc, p) \
925 ((p)[0] == 0 \
926 ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \
927 : unicode_byte_type((p)[0], (p)[1]))
928 #define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1)
929 #define BIG2_CHAR_MATCHES(enc, p, c) ((p)[0] == 0 && (p)[1] == c)
930 #define BIG2_IS_NAME_CHAR_MINBPC(enc, p) \
931 UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])
932 #define BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) \
933 UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1])
934
935 #ifdef XML_MIN_SIZE
936
937 static int PTRFASTCALL
big2_byteType(const ENCODING * enc,const char * p)938 big2_byteType(const ENCODING *enc, const char *p)
939 {
940 return BIG2_BYTE_TYPE(enc, p);
941 }
942
943 static int PTRFASTCALL
big2_byteToAscii(const ENCODING * enc,const char * p)944 big2_byteToAscii(const ENCODING *enc, const char *p)
945 {
946 return BIG2_BYTE_TO_ASCII(enc, p);
947 }
948
949 static int PTRCALL
big2_charMatches(const ENCODING * enc,const char * p,int c)950 big2_charMatches(const ENCODING *enc, const char *p, int c)
951 {
952 return BIG2_CHAR_MATCHES(enc, p, c);
953 }
954
955 static int PTRFASTCALL
big2_isNameMin(const ENCODING * enc,const char * p)956 big2_isNameMin(const ENCODING *enc, const char *p)
957 {
958 return BIG2_IS_NAME_CHAR_MINBPC(enc, p);
959 }
960
961 static int PTRFASTCALL
big2_isNmstrtMin(const ENCODING * enc,const char * p)962 big2_isNmstrtMin(const ENCODING *enc, const char *p)
963 {
964 return BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p);
965 }
966
967 #undef VTABLE
968 #define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16
969
970 #else /* not XML_MIN_SIZE */
971
972 #undef PREFIX
973 #define PREFIX(ident) big2_ ## ident
974 #define MINBPC(enc) 2
975 /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
976 #define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p)
977 #define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p)
978 #define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(enc, p, c)
979 #define IS_NAME_CHAR(enc, p, n) 0
980 #define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(enc, p)
981 #define IS_NMSTRT_CHAR(enc, p, n) (0)
982 #define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p)
983
984 #define XML_TOK_IMPL_C
985 #include "xmltok_impl.c"
986 #undef XML_TOK_IMPL_C
987
988 #undef MINBPC
989 #undef BYTE_TYPE
990 #undef BYTE_TO_ASCII
991 #undef CHAR_MATCHES
992 #undef IS_NAME_CHAR
993 #undef IS_NAME_CHAR_MINBPC
994 #undef IS_NMSTRT_CHAR
995 #undef IS_NMSTRT_CHAR_MINBPC
996 #undef IS_INVALID_CHAR
997
998 #endif /* not XML_MIN_SIZE */
999
1000 #ifdef XML_NS
1001
1002 static const struct normal_encoding big2_encoding_ns = {
1003 { VTABLE, 2, 0,
1004 #if BYTEORDER == 4321
1005 1
1006 #else
1007 0
1008 #endif
1009 },
1010 {
1011 #include "asciitab.h"
1012 #include "latin1tab.h"
1013 },
1014 STANDARD_VTABLE(big2_) NULL_VTABLE
1015 };
1016
1017 #endif
1018
1019 static const struct normal_encoding big2_encoding = {
1020 { VTABLE, 2, 0,
1021 #if BYTEORDER == 4321
1022 1
1023 #else
1024 0
1025 #endif
1026 },
1027 {
1028 #define BT_COLON BT_NMSTRT
1029 #include "asciitab.h"
1030 #undef BT_COLON
1031 #include "latin1tab.h"
1032 },
1033 STANDARD_VTABLE(big2_) NULL_VTABLE
1034 };
1035
1036 #if BYTEORDER != 1234
1037
1038 #ifdef XML_NS
1039
1040 static const struct normal_encoding internal_big2_encoding_ns = {
1041 { VTABLE, 2, 0, 1 },
1042 {
1043 #include "iasciitab.h"
1044 #include "latin1tab.h"
1045 },
1046 STANDARD_VTABLE(big2_) NULL_VTABLE
1047 };
1048
1049 #endif
1050
1051 static const struct normal_encoding internal_big2_encoding = {
1052 { VTABLE, 2, 0, 1 },
1053 {
1054 #define BT_COLON BT_NMSTRT
1055 #include "iasciitab.h"
1056 #undef BT_COLON
1057 #include "latin1tab.h"
1058 },
1059 STANDARD_VTABLE(big2_) NULL_VTABLE
1060 };
1061
1062 #endif
1063
1064 #undef PREFIX
1065
1066 static int FASTCALL
streqci(const char * s1,const char * s2)1067 streqci(const char *s1, const char *s2)
1068 {
1069 for (;;) {
1070 char c1 = *s1++;
1071 char c2 = *s2++;
1072 if (ASCII_a <= c1 && c1 <= ASCII_z)
1073 c1 += ASCII_A - ASCII_a;
1074 if (ASCII_a <= c2 && c2 <= ASCII_z)
1075 /* The following line will never get executed. streqci() is
1076 * only called from two places, both of which guarantee to put
1077 * upper-case strings into s2.
1078 */
1079 c2 += ASCII_A - ASCII_a; /* LCOV_EXCL_LINE */
1080 if (c1 != c2)
1081 return 0;
1082 if (!c1)
1083 break;
1084 }
1085 return 1;
1086 }
1087
1088 static void PTRCALL
initUpdatePosition(const ENCODING * UNUSED_P (enc),const char * ptr,const char * end,POSITION * pos)1089 initUpdatePosition(const ENCODING *UNUSED_P(enc), const char *ptr,
1090 const char *end, POSITION *pos)
1091 {
1092 normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
1093 }
1094
1095 static int
toAscii(const ENCODING * enc,const char * ptr,const char * end)1096 toAscii(const ENCODING *enc, const char *ptr, const char *end)
1097 {
1098 char buf[1];
1099 char *p = buf;
1100 XmlUtf8Convert(enc, &ptr, end, &p, p + 1);
1101 if (p == buf)
1102 return -1;
1103 else
1104 return buf[0];
1105 }
1106
1107 static int FASTCALL
isSpace(int c)1108 isSpace(int c)
1109 {
1110 switch (c) {
1111 case 0x20:
1112 case 0xD:
1113 case 0xA:
1114 case 0x9:
1115 return 1;
1116 }
1117 return 0;
1118 }
1119
1120 /* Return 1 if there's just optional white space or there's an S
1121 followed by name=val.
1122 */
1123 static int
parsePseudoAttribute(const ENCODING * enc,const char * ptr,const char * end,const char ** namePtr,const char ** nameEndPtr,const char ** valPtr,const char ** nextTokPtr)1124 parsePseudoAttribute(const ENCODING *enc,
1125 const char *ptr,
1126 const char *end,
1127 const char **namePtr,
1128 const char **nameEndPtr,
1129 const char **valPtr,
1130 const char **nextTokPtr)
1131 {
1132 int c;
1133 char open;
1134 if (ptr == end) {
1135 *namePtr = NULL;
1136 return 1;
1137 }
1138 if (!isSpace(toAscii(enc, ptr, end))) {
1139 *nextTokPtr = ptr;
1140 return 0;
1141 }
1142 do {
1143 ptr += enc->minBytesPerChar;
1144 } while (isSpace(toAscii(enc, ptr, end)));
1145 if (ptr == end) {
1146 *namePtr = NULL;
1147 return 1;
1148 }
1149 *namePtr = ptr;
1150 for (;;) {
1151 c = toAscii(enc, ptr, end);
1152 if (c == -1) {
1153 *nextTokPtr = ptr;
1154 return 0;
1155 }
1156 if (c == ASCII_EQUALS) {
1157 *nameEndPtr = ptr;
1158 break;
1159 }
1160 if (isSpace(c)) {
1161 *nameEndPtr = ptr;
1162 do {
1163 ptr += enc->minBytesPerChar;
1164 } while (isSpace(c = toAscii(enc, ptr, end)));
1165 if (c != ASCII_EQUALS) {
1166 *nextTokPtr = ptr;
1167 return 0;
1168 }
1169 break;
1170 }
1171 ptr += enc->minBytesPerChar;
1172 }
1173 if (ptr == *namePtr) {
1174 *nextTokPtr = ptr;
1175 return 0;
1176 }
1177 ptr += enc->minBytesPerChar;
1178 c = toAscii(enc, ptr, end);
1179 while (isSpace(c)) {
1180 ptr += enc->minBytesPerChar;
1181 c = toAscii(enc, ptr, end);
1182 }
1183 if (c != ASCII_QUOT && c != ASCII_APOS) {
1184 *nextTokPtr = ptr;
1185 return 0;
1186 }
1187 open = (char)c;
1188 ptr += enc->minBytesPerChar;
1189 *valPtr = ptr;
1190 for (;; ptr += enc->minBytesPerChar) {
1191 c = toAscii(enc, ptr, end);
1192 if (c == open)
1193 break;
1194 if (!(ASCII_a <= c && c <= ASCII_z)
1195 && !(ASCII_A <= c && c <= ASCII_Z)
1196 && !(ASCII_0 <= c && c <= ASCII_9)
1197 && c != ASCII_PERIOD
1198 && c != ASCII_MINUS
1199 && c != ASCII_UNDERSCORE) {
1200 *nextTokPtr = ptr;
1201 return 0;
1202 }
1203 }
1204 *nextTokPtr = ptr + enc->minBytesPerChar;
1205 return 1;
1206 }
1207
1208 static const char KW_version[] = {
1209 ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0'
1210 };
1211
1212 static const char KW_encoding[] = {
1213 ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d, ASCII_i, ASCII_n, ASCII_g, '\0'
1214 };
1215
1216 static const char KW_standalone[] = {
1217 ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, ASCII_l, ASCII_o,
1218 ASCII_n, ASCII_e, '\0'
1219 };
1220
1221 static const char KW_yes[] = {
1222 ASCII_y, ASCII_e, ASCII_s, '\0'
1223 };
1224
1225 static const char KW_no[] = {
1226 ASCII_n, ASCII_o, '\0'
1227 };
1228
1229 static int
doParseXmlDecl(const ENCODING * (* encodingFinder)(const ENCODING *,const char *,const char *),int isGeneralTextEntity,const ENCODING * enc,const char * ptr,const char * end,const char ** badPtr,const char ** versionPtr,const char ** versionEndPtr,const char ** encodingName,const ENCODING ** encoding,int * standalone)1230 doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
1231 const char *,
1232 const char *),
1233 int isGeneralTextEntity,
1234 const ENCODING *enc,
1235 const char *ptr,
1236 const char *end,
1237 const char **badPtr,
1238 const char **versionPtr,
1239 const char **versionEndPtr,
1240 const char **encodingName,
1241 const ENCODING **encoding,
1242 int *standalone)
1243 {
1244 const char *val = NULL;
1245 const char *name = NULL;
1246 const char *nameEnd = NULL;
1247 ptr += 5 * enc->minBytesPerChar;
1248 end -= 2 * enc->minBytesPerChar;
1249 if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)
1250 || !name) {
1251 *badPtr = ptr;
1252 return 0;
1253 }
1254 if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) {
1255 if (!isGeneralTextEntity) {
1256 *badPtr = name;
1257 return 0;
1258 }
1259 }
1260 else {
1261 if (versionPtr)
1262 *versionPtr = val;
1263 if (versionEndPtr)
1264 *versionEndPtr = ptr;
1265 if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
1266 *badPtr = ptr;
1267 return 0;
1268 }
1269 if (!name) {
1270 if (isGeneralTextEntity) {
1271 /* a TextDecl must have an EncodingDecl */
1272 *badPtr = ptr;
1273 return 0;
1274 }
1275 return 1;
1276 }
1277 }
1278 if (XmlNameMatchesAscii(enc, name, nameEnd, KW_encoding)) {
1279 int c = toAscii(enc, val, end);
1280 if (!(ASCII_a <= c && c <= ASCII_z) && !(ASCII_A <= c && c <= ASCII_Z)) {
1281 *badPtr = val;
1282 return 0;
1283 }
1284 if (encodingName)
1285 *encodingName = val;
1286 if (encoding)
1287 *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar);
1288 if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
1289 *badPtr = ptr;
1290 return 0;
1291 }
1292 if (!name)
1293 return 1;
1294 }
1295 if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone)
1296 || isGeneralTextEntity) {
1297 *badPtr = name;
1298 return 0;
1299 }
1300 if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)) {
1301 if (standalone)
1302 *standalone = 1;
1303 }
1304 else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) {
1305 if (standalone)
1306 *standalone = 0;
1307 }
1308 else {
1309 *badPtr = val;
1310 return 0;
1311 }
1312 while (isSpace(toAscii(enc, ptr, end)))
1313 ptr += enc->minBytesPerChar;
1314 if (ptr != end) {
1315 *badPtr = ptr;
1316 return 0;
1317 }
1318 return 1;
1319 }
1320
1321 static int FASTCALL
checkCharRefNumber(int result)1322 checkCharRefNumber(int result)
1323 {
1324 switch (result >> 8) {
1325 case 0xD8: case 0xD9: case 0xDA: case 0xDB:
1326 case 0xDC: case 0xDD: case 0xDE: case 0xDF:
1327 return -1;
1328 case 0:
1329 if (latin1_encoding.type[result] == BT_NONXML)
1330 return -1;
1331 break;
1332 case 0xFF:
1333 if (result == 0xFFFE || result == 0xFFFF)
1334 return -1;
1335 break;
1336 }
1337 return result;
1338 }
1339
1340 int FASTCALL
XmlUtf8Encode(int c,char * buf)1341 XmlUtf8Encode(int c, char *buf)
1342 {
1343 enum {
1344 /* minN is minimum legal resulting value for N byte sequence */
1345 min2 = 0x80,
1346 min3 = 0x800,
1347 min4 = 0x10000
1348 };
1349
1350 if (c < 0)
1351 return 0; /* LCOV_EXCL_LINE: this case is always eliminated beforehand */
1352 if (c < min2) {
1353 buf[0] = (char)(c | UTF8_cval1);
1354 return 1;
1355 }
1356 if (c < min3) {
1357 buf[0] = (char)((c >> 6) | UTF8_cval2);
1358 buf[1] = (char)((c & 0x3f) | 0x80);
1359 return 2;
1360 }
1361 if (c < min4) {
1362 buf[0] = (char)((c >> 12) | UTF8_cval3);
1363 buf[1] = (char)(((c >> 6) & 0x3f) | 0x80);
1364 buf[2] = (char)((c & 0x3f) | 0x80);
1365 return 3;
1366 }
1367 if (c < 0x110000) {
1368 buf[0] = (char)((c >> 18) | UTF8_cval4);
1369 buf[1] = (char)(((c >> 12) & 0x3f) | 0x80);
1370 buf[2] = (char)(((c >> 6) & 0x3f) | 0x80);
1371 buf[3] = (char)((c & 0x3f) | 0x80);
1372 return 4;
1373 }
1374 return 0; /* LCOV_EXCL_LINE: this case too is eliminated before calling */
1375 }
1376
1377 int FASTCALL
XmlUtf16Encode(int charNum,unsigned short * buf)1378 XmlUtf16Encode(int charNum, unsigned short *buf)
1379 {
1380 if (charNum < 0)
1381 return 0;
1382 if (charNum < 0x10000) {
1383 buf[0] = (unsigned short)charNum;
1384 return 1;
1385 }
1386 if (charNum < 0x110000) {
1387 charNum -= 0x10000;
1388 buf[0] = (unsigned short)((charNum >> 10) + 0xD800);
1389 buf[1] = (unsigned short)((charNum & 0x3FF) + 0xDC00);
1390 return 2;
1391 }
1392 return 0;
1393 }
1394
1395 struct unknown_encoding {
1396 struct normal_encoding normal;
1397 CONVERTER convert;
1398 void *userData;
1399 unsigned short utf16[256];
1400 char utf8[256][4];
1401 };
1402
1403 #define AS_UNKNOWN_ENCODING(enc) ((const struct unknown_encoding *) (enc))
1404
1405 int
XmlSizeOfUnknownEncoding(void)1406 XmlSizeOfUnknownEncoding(void)
1407 {
1408 return sizeof(struct unknown_encoding);
1409 }
1410
1411 static int PTRFASTCALL
unknown_isName(const ENCODING * enc,const char * p)1412 unknown_isName(const ENCODING *enc, const char *p)
1413 {
1414 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1415 int c = uenc->convert(uenc->userData, p);
1416 if (c & ~0xFFFF)
1417 return 0;
1418 return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF);
1419 }
1420
1421 static int PTRFASTCALL
unknown_isNmstrt(const ENCODING * enc,const char * p)1422 unknown_isNmstrt(const ENCODING *enc, const char *p)
1423 {
1424 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1425 int c = uenc->convert(uenc->userData, p);
1426 if (c & ~0xFFFF)
1427 return 0;
1428 return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF);
1429 }
1430
1431 static int PTRFASTCALL
unknown_isInvalid(const ENCODING * enc,const char * p)1432 unknown_isInvalid(const ENCODING *enc, const char *p)
1433 {
1434 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1435 int c = uenc->convert(uenc->userData, p);
1436 return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
1437 }
1438
1439 static enum XML_Convert_Result PTRCALL
unknown_toUtf8(const ENCODING * enc,const char ** fromP,const char * fromLim,char ** toP,const char * toLim)1440 unknown_toUtf8(const ENCODING *enc,
1441 const char **fromP, const char *fromLim,
1442 char **toP, const char *toLim)
1443 {
1444 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1445 char buf[XML_UTF8_ENCODE_MAX];
1446 for (;;) {
1447 const char *utf8;
1448 int n;
1449 if (*fromP == fromLim)
1450 return XML_CONVERT_COMPLETED;
1451 utf8 = uenc->utf8[(unsigned char)**fromP];
1452 n = *utf8++;
1453 if (n == 0) {
1454 int c = uenc->convert(uenc->userData, *fromP);
1455 n = XmlUtf8Encode(c, buf);
1456 if (n > toLim - *toP)
1457 return XML_CONVERT_OUTPUT_EXHAUSTED;
1458 utf8 = buf;
1459 *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
1460 - (BT_LEAD2 - 2));
1461 }
1462 else {
1463 if (n > toLim - *toP)
1464 return XML_CONVERT_OUTPUT_EXHAUSTED;
1465 (*fromP)++;
1466 }
1467 memcpy(*toP, utf8, n);
1468 *toP += n;
1469 }
1470 }
1471
1472 static enum XML_Convert_Result PTRCALL
unknown_toUtf16(const ENCODING * enc,const char ** fromP,const char * fromLim,unsigned short ** toP,const unsigned short * toLim)1473 unknown_toUtf16(const ENCODING *enc,
1474 const char **fromP, const char *fromLim,
1475 unsigned short **toP, const unsigned short *toLim)
1476 {
1477 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1478 while (*fromP < fromLim && *toP < toLim) {
1479 unsigned short c = uenc->utf16[(unsigned char)**fromP];
1480 if (c == 0) {
1481 c = (unsigned short)
1482 uenc->convert(uenc->userData, *fromP);
1483 *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
1484 - (BT_LEAD2 - 2));
1485 }
1486 else
1487 (*fromP)++;
1488 *(*toP)++ = c;
1489 }
1490
1491 if ((*toP == toLim) && (*fromP < fromLim))
1492 return XML_CONVERT_OUTPUT_EXHAUSTED;
1493 else
1494 return XML_CONVERT_COMPLETED;
1495 }
1496
1497 ENCODING *
XmlInitUnknownEncoding(void * mem,int * table,CONVERTER convert,void * userData)1498 XmlInitUnknownEncoding(void *mem,
1499 int *table,
1500 CONVERTER convert,
1501 void *userData)
1502 {
1503 int i;
1504 struct unknown_encoding *e = (struct unknown_encoding *)mem;
1505 for (i = 0; i < (int)sizeof(struct normal_encoding); i++)
1506 ((char *)mem)[i] = ((char *)&latin1_encoding)[i];
1507 for (i = 0; i < 128; i++)
1508 if (latin1_encoding.type[i] != BT_OTHER
1509 && latin1_encoding.type[i] != BT_NONXML
1510 && table[i] != i)
1511 return 0;
1512 for (i = 0; i < 256; i++) {
1513 int c = table[i];
1514 if (c == -1) {
1515 e->normal.type[i] = BT_MALFORM;
1516 /* This shouldn't really get used. */
1517 e->utf16[i] = 0xFFFF;
1518 e->utf8[i][0] = 1;
1519 e->utf8[i][1] = 0;
1520 }
1521 else if (c < 0) {
1522 if (c < -4)
1523 return 0;
1524 /* Multi-byte sequences need a converter function */
1525 if (!convert)
1526 return 0;
1527 e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2));
1528 e->utf8[i][0] = 0;
1529 e->utf16[i] = 0;
1530 }
1531 else if (c < 0x80) {
1532 if (latin1_encoding.type[c] != BT_OTHER
1533 && latin1_encoding.type[c] != BT_NONXML
1534 && c != i)
1535 return 0;
1536 e->normal.type[i] = latin1_encoding.type[c];
1537 e->utf8[i][0] = 1;
1538 e->utf8[i][1] = (char)c;
1539 e->utf16[i] = (unsigned short)(c == 0 ? 0xFFFF : c);
1540 }
1541 else if (checkCharRefNumber(c) < 0) {
1542 e->normal.type[i] = BT_NONXML;
1543 /* This shouldn't really get used. */
1544 e->utf16[i] = 0xFFFF;
1545 e->utf8[i][0] = 1;
1546 e->utf8[i][1] = 0;
1547 }
1548 else {
1549 if (c > 0xFFFF)
1550 return 0;
1551 if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff))
1552 e->normal.type[i] = BT_NMSTRT;
1553 else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff))
1554 e->normal.type[i] = BT_NAME;
1555 else
1556 e->normal.type[i] = BT_OTHER;
1557 e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1);
1558 e->utf16[i] = (unsigned short)c;
1559 }
1560 }
1561 e->userData = userData;
1562 e->convert = convert;
1563 if (convert) {
1564 e->normal.isName2 = unknown_isName;
1565 e->normal.isName3 = unknown_isName;
1566 e->normal.isName4 = unknown_isName;
1567 e->normal.isNmstrt2 = unknown_isNmstrt;
1568 e->normal.isNmstrt3 = unknown_isNmstrt;
1569 e->normal.isNmstrt4 = unknown_isNmstrt;
1570 e->normal.isInvalid2 = unknown_isInvalid;
1571 e->normal.isInvalid3 = unknown_isInvalid;
1572 e->normal.isInvalid4 = unknown_isInvalid;
1573 }
1574 e->normal.enc.utf8Convert = unknown_toUtf8;
1575 e->normal.enc.utf16Convert = unknown_toUtf16;
1576 return &(e->normal.enc);
1577 }
1578
1579 /* If this enumeration is changed, getEncodingIndex and encodings
1580 must also be changed. */
1581 enum {
1582 UNKNOWN_ENC = -1,
1583 ISO_8859_1_ENC = 0,
1584 US_ASCII_ENC,
1585 UTF_8_ENC,
1586 UTF_16_ENC,
1587 UTF_16BE_ENC,
1588 UTF_16LE_ENC,
1589 /* must match encodingNames up to here */
1590 NO_ENC
1591 };
1592
1593 static const char KW_ISO_8859_1[] = {
1594 ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, ASCII_5, ASCII_9,
1595 ASCII_MINUS, ASCII_1, '\0'
1596 };
1597 static const char KW_US_ASCII[] = {
1598 ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, ASCII_C, ASCII_I, ASCII_I,
1599 '\0'
1600 };
1601 static const char KW_UTF_8[] = {
1602 ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0'
1603 };
1604 static const char KW_UTF_16[] = {
1605 ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0'
1606 };
1607 static const char KW_UTF_16BE[] = {
1608 ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_B, ASCII_E,
1609 '\0'
1610 };
1611 static const char KW_UTF_16LE[] = {
1612 ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_L, ASCII_E,
1613 '\0'
1614 };
1615
1616 static int FASTCALL
getEncodingIndex(const char * name)1617 getEncodingIndex(const char *name)
1618 {
1619 static const char * const encodingNames[] = {
1620 KW_ISO_8859_1,
1621 KW_US_ASCII,
1622 KW_UTF_8,
1623 KW_UTF_16,
1624 KW_UTF_16BE,
1625 KW_UTF_16LE,
1626 };
1627 int i;
1628 if (name == NULL)
1629 return NO_ENC;
1630 for (i = 0; i < (int)(sizeof(encodingNames)/sizeof(encodingNames[0])); i++)
1631 if (streqci(name, encodingNames[i]))
1632 return i;
1633 return UNKNOWN_ENC;
1634 }
1635
1636 /* For binary compatibility, we store the index of the encoding
1637 specified at initialization in the isUtf16 member.
1638 */
1639
1640 #define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16)
1641 #define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i)
1642
1643 /* This is what detects the encoding. encodingTable maps from
1644 encoding indices to encodings; INIT_ENC_INDEX(enc) is the index of
1645 the external (protocol) specified encoding; state is
1646 XML_CONTENT_STATE if we're parsing an external text entity, and
1647 XML_PROLOG_STATE otherwise.
1648 */
1649
1650
1651 static int
initScan(const ENCODING * const * encodingTable,const INIT_ENCODING * enc,int state,const char * ptr,const char * end,const char ** nextTokPtr)1652 initScan(const ENCODING * const *encodingTable,
1653 const INIT_ENCODING *enc,
1654 int state,
1655 const char *ptr,
1656 const char *end,
1657 const char **nextTokPtr)
1658 {
1659 const ENCODING **encPtr;
1660
1661 if (ptr >= end)
1662 return XML_TOK_NONE;
1663 encPtr = enc->encPtr;
1664 if (ptr + 1 == end) {
1665 /* only a single byte available for auto-detection */
1666 #ifndef XML_DTD /* FIXME */
1667 /* a well-formed document entity must have more than one byte */
1668 if (state != XML_CONTENT_STATE)
1669 return XML_TOK_PARTIAL;
1670 #endif
1671 /* so we're parsing an external text entity... */
1672 /* if UTF-16 was externally specified, then we need at least 2 bytes */
1673 switch (INIT_ENC_INDEX(enc)) {
1674 case UTF_16_ENC:
1675 case UTF_16LE_ENC:
1676 case UTF_16BE_ENC:
1677 return XML_TOK_PARTIAL;
1678 }
1679 switch ((unsigned char)*ptr) {
1680 case 0xFE:
1681 case 0xFF:
1682 case 0xEF: /* possibly first byte of UTF-8 BOM */
1683 if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
1684 && state == XML_CONTENT_STATE)
1685 break;
1686 /* fall through */
1687 case 0x00:
1688 case 0x3C:
1689 return XML_TOK_PARTIAL;
1690 }
1691 }
1692 else {
1693 switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) {
1694 case 0xFEFF:
1695 if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
1696 && state == XML_CONTENT_STATE)
1697 break;
1698 *nextTokPtr = ptr + 2;
1699 *encPtr = encodingTable[UTF_16BE_ENC];
1700 return XML_TOK_BOM;
1701 /* 00 3C is handled in the default case */
1702 case 0x3C00:
1703 if ((INIT_ENC_INDEX(enc) == UTF_16BE_ENC
1704 || INIT_ENC_INDEX(enc) == UTF_16_ENC)
1705 && state == XML_CONTENT_STATE)
1706 break;
1707 *encPtr = encodingTable[UTF_16LE_ENC];
1708 return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1709 case 0xFFFE:
1710 if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
1711 && state == XML_CONTENT_STATE)
1712 break;
1713 *nextTokPtr = ptr + 2;
1714 *encPtr = encodingTable[UTF_16LE_ENC];
1715 return XML_TOK_BOM;
1716 case 0xEFBB:
1717 /* Maybe a UTF-8 BOM (EF BB BF) */
1718 /* If there's an explicitly specified (external) encoding
1719 of ISO-8859-1 or some flavour of UTF-16
1720 and this is an external text entity,
1721 don't look for the BOM,
1722 because it might be a legal data.
1723 */
1724 if (state == XML_CONTENT_STATE) {
1725 int e = INIT_ENC_INDEX(enc);
1726 if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC
1727 || e == UTF_16LE_ENC || e == UTF_16_ENC)
1728 break;
1729 }
1730 if (ptr + 2 == end)
1731 return XML_TOK_PARTIAL;
1732 if ((unsigned char)ptr[2] == 0xBF) {
1733 *nextTokPtr = ptr + 3;
1734 *encPtr = encodingTable[UTF_8_ENC];
1735 return XML_TOK_BOM;
1736 }
1737 break;
1738 default:
1739 if (ptr[0] == '\0') {
1740 /* 0 isn't a legal data character. Furthermore a document
1741 entity can only start with ASCII characters. So the only
1742 way this can fail to be big-endian UTF-16 if it it's an
1743 external parsed general entity that's labelled as
1744 UTF-16LE.
1745 */
1746 if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC)
1747 break;
1748 *encPtr = encodingTable[UTF_16BE_ENC];
1749 return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1750 }
1751 else if (ptr[1] == '\0') {
1752 /* We could recover here in the case:
1753 - parsing an external entity
1754 - second byte is 0
1755 - no externally specified encoding
1756 - no encoding declaration
1757 by assuming UTF-16LE. But we don't, because this would mean when
1758 presented just with a single byte, we couldn't reliably determine
1759 whether we needed further bytes.
1760 */
1761 if (state == XML_CONTENT_STATE)
1762 break;
1763 *encPtr = encodingTable[UTF_16LE_ENC];
1764 return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1765 }
1766 break;
1767 }
1768 }
1769 *encPtr = encodingTable[INIT_ENC_INDEX(enc)];
1770 return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1771 }
1772
1773
1774 #define NS(x) x
1775 #define ns(x) x
1776 #define XML_TOK_NS_C
1777 #include "xmltok_ns.c"
1778 #undef XML_TOK_NS_C
1779 #undef NS
1780 #undef ns
1781
1782 #ifdef XML_NS
1783
1784 #define NS(x) x ## NS
1785 #define ns(x) x ## _ns
1786
1787 #define XML_TOK_NS_C
1788 #include "xmltok_ns.c"
1789 #undef XML_TOK_NS_C
1790
1791 #undef NS
1792 #undef ns
1793
1794 ENCODING *
XmlInitUnknownEncodingNS(void * mem,int * table,CONVERTER convert,void * userData)1795 XmlInitUnknownEncodingNS(void *mem,
1796 int *table,
1797 CONVERTER convert,
1798 void *userData)
1799 {
1800 ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);
1801 if (enc)
1802 ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON;
1803 return enc;
1804 }
1805
1806 #endif /* XML_NS */
1807