1 /*
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000-2017 Expat development team
11 Licensed under the MIT license:
12
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to permit
18 persons to whom the Software is furnished to do so, subject to the
19 following conditions:
20
21 The above copyright notice and this permission notice shall be included
22 in all copies or substantial portions of the Software.
23
24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
29 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30 USE OR OTHER DEALINGS IN THE SOFTWARE.
31 */
32
33 #include <stddef.h>
34
35 #ifdef _WIN32
36 # include "winconfig.h"
37 #else
38 # ifdef HAVE_EXPAT_CONFIG_H
39 # include <expat_config.h>
40 # endif
41 #endif /* ndef _WIN32 */
42
43 #include "expat_external.h"
44 #include "internal.h"
45 #include "xmlrole.h"
46 #include "ascii.h"
47
48 /* Doesn't check:
49
50 that ,| are not mixed in a model group
51 content of literals
52
53 */
54
55 static const char KW_ANY[] = {ASCII_A, ASCII_N, ASCII_Y, '\0'};
56 static const char KW_ATTLIST[]
57 = {ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0'};
58 static const char KW_CDATA[]
59 = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
60 static const char KW_DOCTYPE[]
61 = {ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0'};
62 static const char KW_ELEMENT[]
63 = {ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0'};
64 static const char KW_EMPTY[]
65 = {ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0'};
66 static const char KW_ENTITIES[] = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
67 ASCII_I, ASCII_E, ASCII_S, '\0'};
68 static const char KW_ENTITY[]
69 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
70 static const char KW_FIXED[]
71 = {ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0'};
72 static const char KW_ID[] = {ASCII_I, ASCII_D, '\0'};
73 static const char KW_IDREF[]
74 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
75 static const char KW_IDREFS[]
76 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
77 #ifdef XML_DTD
78 static const char KW_IGNORE[]
79 = {ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0'};
80 #endif
81 static const char KW_IMPLIED[]
82 = {ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0'};
83 #ifdef XML_DTD
84 static const char KW_INCLUDE[]
85 = {ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0'};
86 #endif
87 static const char KW_NDATA[]
88 = {ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
89 static const char KW_NMTOKEN[]
90 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
91 static const char KW_NMTOKENS[] = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
92 ASCII_E, ASCII_N, ASCII_S, '\0'};
93 static const char KW_NOTATION[] = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T,
94 ASCII_I, ASCII_O, ASCII_N, '\0'};
95 static const char KW_PCDATA[]
96 = {ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
97 static const char KW_PUBLIC[]
98 = {ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0'};
99 static const char KW_REQUIRED[] = {ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I,
100 ASCII_R, ASCII_E, ASCII_D, '\0'};
101 static const char KW_SYSTEM[]
102 = {ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0'};
103
104 #ifndef MIN_BYTES_PER_CHAR
105 # define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
106 #endif
107
108 #ifdef XML_DTD
109 # define setTopLevel(state) \
110 ((state)->handler \
111 = ((state)->documentEntity ? internalSubset : externalSubset1))
112 #else /* not XML_DTD */
113 # define setTopLevel(state) ((state)->handler = internalSubset)
114 #endif /* not XML_DTD */
115
116 typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state, int tok,
117 const char *ptr, const char *end,
118 const ENCODING *enc);
119
120 static PROLOG_HANDLER prolog0, prolog1, prolog2, doctype0, doctype1, doctype2,
121 doctype3, doctype4, doctype5, internalSubset, entity0, entity1, entity2,
122 entity3, entity4, entity5, entity6, entity7, entity8, entity9, entity10,
123 notation0, notation1, notation2, notation3, notation4, attlist0, attlist1,
124 attlist2, attlist3, attlist4, attlist5, attlist6, attlist7, attlist8,
125 attlist9, element0, element1, element2, element3, element4, element5,
126 element6, element7,
127 #ifdef XML_DTD
128 externalSubset0, externalSubset1, condSect0, condSect1, condSect2,
129 #endif /* XML_DTD */
130 declClose, error;
131
132 static int FASTCALL common(PROLOG_STATE *state, int tok);
133
134 static int PTRCALL
prolog0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)135 prolog0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
136 const ENCODING *enc) {
137 switch (tok) {
138 case XML_TOK_PROLOG_S:
139 state->handler = prolog1;
140 return XML_ROLE_NONE;
141 case XML_TOK_XML_DECL:
142 state->handler = prolog1;
143 return XML_ROLE_XML_DECL;
144 case XML_TOK_PI:
145 state->handler = prolog1;
146 return XML_ROLE_PI;
147 case XML_TOK_COMMENT:
148 state->handler = prolog1;
149 return XML_ROLE_COMMENT;
150 case XML_TOK_BOM:
151 return XML_ROLE_NONE;
152 case XML_TOK_DECL_OPEN:
153 if (! XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
154 KW_DOCTYPE))
155 break;
156 state->handler = doctype0;
157 return XML_ROLE_DOCTYPE_NONE;
158 case XML_TOK_INSTANCE_START:
159 state->handler = error;
160 return XML_ROLE_INSTANCE_START;
161 }
162 return common(state, tok);
163 }
164
165 static int PTRCALL
prolog1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)166 prolog1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
167 const ENCODING *enc) {
168 switch (tok) {
169 case XML_TOK_PROLOG_S:
170 return XML_ROLE_NONE;
171 case XML_TOK_PI:
172 return XML_ROLE_PI;
173 case XML_TOK_COMMENT:
174 return XML_ROLE_COMMENT;
175 case XML_TOK_BOM:
176 /* This case can never arise. To reach this role function, the
177 * parse must have passed through prolog0 and therefore have had
178 * some form of input, even if only a space. At that point, a
179 * byte order mark is no longer a valid character (though
180 * technically it should be interpreted as a non-breaking space),
181 * so will be rejected by the tokenizing stages.
182 */
183 return XML_ROLE_NONE; /* LCOV_EXCL_LINE */
184 case XML_TOK_DECL_OPEN:
185 if (! XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
186 KW_DOCTYPE))
187 break;
188 state->handler = doctype0;
189 return XML_ROLE_DOCTYPE_NONE;
190 case XML_TOK_INSTANCE_START:
191 state->handler = error;
192 return XML_ROLE_INSTANCE_START;
193 }
194 return common(state, tok);
195 }
196
197 static int PTRCALL
prolog2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)198 prolog2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
199 const ENCODING *enc) {
200 UNUSED_P(ptr);
201 UNUSED_P(end);
202 UNUSED_P(enc);
203 switch (tok) {
204 case XML_TOK_PROLOG_S:
205 return XML_ROLE_NONE;
206 case XML_TOK_PI:
207 return XML_ROLE_PI;
208 case XML_TOK_COMMENT:
209 return XML_ROLE_COMMENT;
210 case XML_TOK_INSTANCE_START:
211 state->handler = error;
212 return XML_ROLE_INSTANCE_START;
213 }
214 return common(state, tok);
215 }
216
217 static int PTRCALL
doctype0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)218 doctype0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
219 const ENCODING *enc) {
220 UNUSED_P(ptr);
221 UNUSED_P(end);
222 UNUSED_P(enc);
223 switch (tok) {
224 case XML_TOK_PROLOG_S:
225 return XML_ROLE_DOCTYPE_NONE;
226 case XML_TOK_NAME:
227 case XML_TOK_PREFIXED_NAME:
228 state->handler = doctype1;
229 return XML_ROLE_DOCTYPE_NAME;
230 }
231 return common(state, tok);
232 }
233
234 static int PTRCALL
doctype1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)235 doctype1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
236 const ENCODING *enc) {
237 switch (tok) {
238 case XML_TOK_PROLOG_S:
239 return XML_ROLE_DOCTYPE_NONE;
240 case XML_TOK_OPEN_BRACKET:
241 state->handler = internalSubset;
242 return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
243 case XML_TOK_DECL_CLOSE:
244 state->handler = prolog2;
245 return XML_ROLE_DOCTYPE_CLOSE;
246 case XML_TOK_NAME:
247 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
248 state->handler = doctype3;
249 return XML_ROLE_DOCTYPE_NONE;
250 }
251 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
252 state->handler = doctype2;
253 return XML_ROLE_DOCTYPE_NONE;
254 }
255 break;
256 }
257 return common(state, tok);
258 }
259
260 static int PTRCALL
doctype2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)261 doctype2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
262 const ENCODING *enc) {
263 UNUSED_P(ptr);
264 UNUSED_P(end);
265 UNUSED_P(enc);
266 switch (tok) {
267 case XML_TOK_PROLOG_S:
268 return XML_ROLE_DOCTYPE_NONE;
269 case XML_TOK_LITERAL:
270 state->handler = doctype3;
271 return XML_ROLE_DOCTYPE_PUBLIC_ID;
272 }
273 return common(state, tok);
274 }
275
276 static int PTRCALL
doctype3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)277 doctype3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
278 const ENCODING *enc) {
279 UNUSED_P(ptr);
280 UNUSED_P(end);
281 UNUSED_P(enc);
282 switch (tok) {
283 case XML_TOK_PROLOG_S:
284 return XML_ROLE_DOCTYPE_NONE;
285 case XML_TOK_LITERAL:
286 state->handler = doctype4;
287 return XML_ROLE_DOCTYPE_SYSTEM_ID;
288 }
289 return common(state, tok);
290 }
291
292 static int PTRCALL
doctype4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)293 doctype4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
294 const ENCODING *enc) {
295 UNUSED_P(ptr);
296 UNUSED_P(end);
297 UNUSED_P(enc);
298 switch (tok) {
299 case XML_TOK_PROLOG_S:
300 return XML_ROLE_DOCTYPE_NONE;
301 case XML_TOK_OPEN_BRACKET:
302 state->handler = internalSubset;
303 return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
304 case XML_TOK_DECL_CLOSE:
305 state->handler = prolog2;
306 return XML_ROLE_DOCTYPE_CLOSE;
307 }
308 return common(state, tok);
309 }
310
311 static int PTRCALL
doctype5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)312 doctype5(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
313 const ENCODING *enc) {
314 UNUSED_P(ptr);
315 UNUSED_P(end);
316 UNUSED_P(enc);
317 switch (tok) {
318 case XML_TOK_PROLOG_S:
319 return XML_ROLE_DOCTYPE_NONE;
320 case XML_TOK_DECL_CLOSE:
321 state->handler = prolog2;
322 return XML_ROLE_DOCTYPE_CLOSE;
323 }
324 return common(state, tok);
325 }
326
327 static int PTRCALL
internalSubset(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)328 internalSubset(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
329 const ENCODING *enc) {
330 switch (tok) {
331 case XML_TOK_PROLOG_S:
332 return XML_ROLE_NONE;
333 case XML_TOK_DECL_OPEN:
334 if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
335 KW_ENTITY)) {
336 state->handler = entity0;
337 return XML_ROLE_ENTITY_NONE;
338 }
339 if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
340 KW_ATTLIST)) {
341 state->handler = attlist0;
342 return XML_ROLE_ATTLIST_NONE;
343 }
344 if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
345 KW_ELEMENT)) {
346 state->handler = element0;
347 return XML_ROLE_ELEMENT_NONE;
348 }
349 if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
350 KW_NOTATION)) {
351 state->handler = notation0;
352 return XML_ROLE_NOTATION_NONE;
353 }
354 break;
355 case XML_TOK_PI:
356 return XML_ROLE_PI;
357 case XML_TOK_COMMENT:
358 return XML_ROLE_COMMENT;
359 case XML_TOK_PARAM_ENTITY_REF:
360 return XML_ROLE_PARAM_ENTITY_REF;
361 case XML_TOK_CLOSE_BRACKET:
362 state->handler = doctype5;
363 return XML_ROLE_DOCTYPE_NONE;
364 case XML_TOK_NONE:
365 return XML_ROLE_NONE;
366 }
367 return common(state, tok);
368 }
369
370 #ifdef XML_DTD
371
372 static int PTRCALL
externalSubset0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)373 externalSubset0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
374 const ENCODING *enc) {
375 state->handler = externalSubset1;
376 if (tok == XML_TOK_XML_DECL)
377 return XML_ROLE_TEXT_DECL;
378 return externalSubset1(state, tok, ptr, end, enc);
379 }
380
381 static int PTRCALL
externalSubset1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)382 externalSubset1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
383 const ENCODING *enc) {
384 switch (tok) {
385 case XML_TOK_COND_SECT_OPEN:
386 state->handler = condSect0;
387 return XML_ROLE_NONE;
388 case XML_TOK_COND_SECT_CLOSE:
389 if (state->includeLevel == 0)
390 break;
391 state->includeLevel -= 1;
392 return XML_ROLE_NONE;
393 case XML_TOK_PROLOG_S:
394 return XML_ROLE_NONE;
395 case XML_TOK_CLOSE_BRACKET:
396 break;
397 case XML_TOK_NONE:
398 if (state->includeLevel)
399 break;
400 return XML_ROLE_NONE;
401 default:
402 return internalSubset(state, tok, ptr, end, enc);
403 }
404 return common(state, tok);
405 }
406
407 #endif /* XML_DTD */
408
409 static int PTRCALL
entity0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)410 entity0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
411 const ENCODING *enc) {
412 UNUSED_P(ptr);
413 UNUSED_P(end);
414 UNUSED_P(enc);
415 switch (tok) {
416 case XML_TOK_PROLOG_S:
417 return XML_ROLE_ENTITY_NONE;
418 case XML_TOK_PERCENT:
419 state->handler = entity1;
420 return XML_ROLE_ENTITY_NONE;
421 case XML_TOK_NAME:
422 state->handler = entity2;
423 return XML_ROLE_GENERAL_ENTITY_NAME;
424 }
425 return common(state, tok);
426 }
427
428 static int PTRCALL
entity1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)429 entity1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
430 const ENCODING *enc) {
431 UNUSED_P(ptr);
432 UNUSED_P(end);
433 UNUSED_P(enc);
434 switch (tok) {
435 case XML_TOK_PROLOG_S:
436 return XML_ROLE_ENTITY_NONE;
437 case XML_TOK_NAME:
438 state->handler = entity7;
439 return XML_ROLE_PARAM_ENTITY_NAME;
440 }
441 return common(state, tok);
442 }
443
444 static int PTRCALL
entity2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)445 entity2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
446 const ENCODING *enc) {
447 switch (tok) {
448 case XML_TOK_PROLOG_S:
449 return XML_ROLE_ENTITY_NONE;
450 case XML_TOK_NAME:
451 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
452 state->handler = entity4;
453 return XML_ROLE_ENTITY_NONE;
454 }
455 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
456 state->handler = entity3;
457 return XML_ROLE_ENTITY_NONE;
458 }
459 break;
460 case XML_TOK_LITERAL:
461 state->handler = declClose;
462 state->role_none = XML_ROLE_ENTITY_NONE;
463 return XML_ROLE_ENTITY_VALUE;
464 }
465 return common(state, tok);
466 }
467
468 static int PTRCALL
entity3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)469 entity3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
470 const ENCODING *enc) {
471 UNUSED_P(ptr);
472 UNUSED_P(end);
473 UNUSED_P(enc);
474 switch (tok) {
475 case XML_TOK_PROLOG_S:
476 return XML_ROLE_ENTITY_NONE;
477 case XML_TOK_LITERAL:
478 state->handler = entity4;
479 return XML_ROLE_ENTITY_PUBLIC_ID;
480 }
481 return common(state, tok);
482 }
483
484 static int PTRCALL
entity4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)485 entity4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
486 const ENCODING *enc) {
487 UNUSED_P(ptr);
488 UNUSED_P(end);
489 UNUSED_P(enc);
490 switch (tok) {
491 case XML_TOK_PROLOG_S:
492 return XML_ROLE_ENTITY_NONE;
493 case XML_TOK_LITERAL:
494 state->handler = entity5;
495 return XML_ROLE_ENTITY_SYSTEM_ID;
496 }
497 return common(state, tok);
498 }
499
500 static int PTRCALL
entity5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)501 entity5(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
502 const ENCODING *enc) {
503 switch (tok) {
504 case XML_TOK_PROLOG_S:
505 return XML_ROLE_ENTITY_NONE;
506 case XML_TOK_DECL_CLOSE:
507 setTopLevel(state);
508 return XML_ROLE_ENTITY_COMPLETE;
509 case XML_TOK_NAME:
510 if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
511 state->handler = entity6;
512 return XML_ROLE_ENTITY_NONE;
513 }
514 break;
515 }
516 return common(state, tok);
517 }
518
519 static int PTRCALL
entity6(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)520 entity6(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
521 const ENCODING *enc) {
522 UNUSED_P(ptr);
523 UNUSED_P(end);
524 UNUSED_P(enc);
525 switch (tok) {
526 case XML_TOK_PROLOG_S:
527 return XML_ROLE_ENTITY_NONE;
528 case XML_TOK_NAME:
529 state->handler = declClose;
530 state->role_none = XML_ROLE_ENTITY_NONE;
531 return XML_ROLE_ENTITY_NOTATION_NAME;
532 }
533 return common(state, tok);
534 }
535
536 static int PTRCALL
entity7(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)537 entity7(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
538 const ENCODING *enc) {
539 switch (tok) {
540 case XML_TOK_PROLOG_S:
541 return XML_ROLE_ENTITY_NONE;
542 case XML_TOK_NAME:
543 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
544 state->handler = entity9;
545 return XML_ROLE_ENTITY_NONE;
546 }
547 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
548 state->handler = entity8;
549 return XML_ROLE_ENTITY_NONE;
550 }
551 break;
552 case XML_TOK_LITERAL:
553 state->handler = declClose;
554 state->role_none = XML_ROLE_ENTITY_NONE;
555 return XML_ROLE_ENTITY_VALUE;
556 }
557 return common(state, tok);
558 }
559
560 static int PTRCALL
entity8(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)561 entity8(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
562 const ENCODING *enc) {
563 UNUSED_P(ptr);
564 UNUSED_P(end);
565 UNUSED_P(enc);
566 switch (tok) {
567 case XML_TOK_PROLOG_S:
568 return XML_ROLE_ENTITY_NONE;
569 case XML_TOK_LITERAL:
570 state->handler = entity9;
571 return XML_ROLE_ENTITY_PUBLIC_ID;
572 }
573 return common(state, tok);
574 }
575
576 static int PTRCALL
entity9(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)577 entity9(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
578 const ENCODING *enc) {
579 UNUSED_P(ptr);
580 UNUSED_P(end);
581 UNUSED_P(enc);
582 switch (tok) {
583 case XML_TOK_PROLOG_S:
584 return XML_ROLE_ENTITY_NONE;
585 case XML_TOK_LITERAL:
586 state->handler = entity10;
587 return XML_ROLE_ENTITY_SYSTEM_ID;
588 }
589 return common(state, tok);
590 }
591
592 static int PTRCALL
entity10(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)593 entity10(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
594 const ENCODING *enc) {
595 UNUSED_P(ptr);
596 UNUSED_P(end);
597 UNUSED_P(enc);
598 switch (tok) {
599 case XML_TOK_PROLOG_S:
600 return XML_ROLE_ENTITY_NONE;
601 case XML_TOK_DECL_CLOSE:
602 setTopLevel(state);
603 return XML_ROLE_ENTITY_COMPLETE;
604 }
605 return common(state, tok);
606 }
607
608 static int PTRCALL
notation0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)609 notation0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
610 const ENCODING *enc) {
611 UNUSED_P(ptr);
612 UNUSED_P(end);
613 UNUSED_P(enc);
614 switch (tok) {
615 case XML_TOK_PROLOG_S:
616 return XML_ROLE_NOTATION_NONE;
617 case XML_TOK_NAME:
618 state->handler = notation1;
619 return XML_ROLE_NOTATION_NAME;
620 }
621 return common(state, tok);
622 }
623
624 static int PTRCALL
notation1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)625 notation1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
626 const ENCODING *enc) {
627 switch (tok) {
628 case XML_TOK_PROLOG_S:
629 return XML_ROLE_NOTATION_NONE;
630 case XML_TOK_NAME:
631 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
632 state->handler = notation3;
633 return XML_ROLE_NOTATION_NONE;
634 }
635 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
636 state->handler = notation2;
637 return XML_ROLE_NOTATION_NONE;
638 }
639 break;
640 }
641 return common(state, tok);
642 }
643
644 static int PTRCALL
notation2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)645 notation2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
646 const ENCODING *enc) {
647 UNUSED_P(ptr);
648 UNUSED_P(end);
649 UNUSED_P(enc);
650 switch (tok) {
651 case XML_TOK_PROLOG_S:
652 return XML_ROLE_NOTATION_NONE;
653 case XML_TOK_LITERAL:
654 state->handler = notation4;
655 return XML_ROLE_NOTATION_PUBLIC_ID;
656 }
657 return common(state, tok);
658 }
659
660 static int PTRCALL
notation3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)661 notation3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
662 const ENCODING *enc) {
663 UNUSED_P(ptr);
664 UNUSED_P(end);
665 UNUSED_P(enc);
666 switch (tok) {
667 case XML_TOK_PROLOG_S:
668 return XML_ROLE_NOTATION_NONE;
669 case XML_TOK_LITERAL:
670 state->handler = declClose;
671 state->role_none = XML_ROLE_NOTATION_NONE;
672 return XML_ROLE_NOTATION_SYSTEM_ID;
673 }
674 return common(state, tok);
675 }
676
677 static int PTRCALL
notation4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)678 notation4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
679 const ENCODING *enc) {
680 UNUSED_P(ptr);
681 UNUSED_P(end);
682 UNUSED_P(enc);
683 switch (tok) {
684 case XML_TOK_PROLOG_S:
685 return XML_ROLE_NOTATION_NONE;
686 case XML_TOK_LITERAL:
687 state->handler = declClose;
688 state->role_none = XML_ROLE_NOTATION_NONE;
689 return XML_ROLE_NOTATION_SYSTEM_ID;
690 case XML_TOK_DECL_CLOSE:
691 setTopLevel(state);
692 return XML_ROLE_NOTATION_NO_SYSTEM_ID;
693 }
694 return common(state, tok);
695 }
696
697 static int PTRCALL
attlist0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)698 attlist0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
699 const ENCODING *enc) {
700 UNUSED_P(ptr);
701 UNUSED_P(end);
702 UNUSED_P(enc);
703 switch (tok) {
704 case XML_TOK_PROLOG_S:
705 return XML_ROLE_ATTLIST_NONE;
706 case XML_TOK_NAME:
707 case XML_TOK_PREFIXED_NAME:
708 state->handler = attlist1;
709 return XML_ROLE_ATTLIST_ELEMENT_NAME;
710 }
711 return common(state, tok);
712 }
713
714 static int PTRCALL
attlist1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)715 attlist1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
716 const ENCODING *enc) {
717 UNUSED_P(ptr);
718 UNUSED_P(end);
719 UNUSED_P(enc);
720 switch (tok) {
721 case XML_TOK_PROLOG_S:
722 return XML_ROLE_ATTLIST_NONE;
723 case XML_TOK_DECL_CLOSE:
724 setTopLevel(state);
725 return XML_ROLE_ATTLIST_NONE;
726 case XML_TOK_NAME:
727 case XML_TOK_PREFIXED_NAME:
728 state->handler = attlist2;
729 return XML_ROLE_ATTRIBUTE_NAME;
730 }
731 return common(state, tok);
732 }
733
734 static int PTRCALL
attlist2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)735 attlist2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
736 const ENCODING *enc) {
737 switch (tok) {
738 case XML_TOK_PROLOG_S:
739 return XML_ROLE_ATTLIST_NONE;
740 case XML_TOK_NAME: {
741 static const char *const types[] = {
742 KW_CDATA, KW_ID, KW_IDREF, KW_IDREFS,
743 KW_ENTITY, KW_ENTITIES, KW_NMTOKEN, KW_NMTOKENS,
744 };
745 int i;
746 for (i = 0; i < (int)(sizeof(types) / sizeof(types[0])); i++)
747 if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
748 state->handler = attlist8;
749 return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
750 }
751 }
752 if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
753 state->handler = attlist5;
754 return XML_ROLE_ATTLIST_NONE;
755 }
756 break;
757 case XML_TOK_OPEN_PAREN:
758 state->handler = attlist3;
759 return XML_ROLE_ATTLIST_NONE;
760 }
761 return common(state, tok);
762 }
763
764 static int PTRCALL
attlist3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)765 attlist3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
766 const ENCODING *enc) {
767 UNUSED_P(ptr);
768 UNUSED_P(end);
769 UNUSED_P(enc);
770 switch (tok) {
771 case XML_TOK_PROLOG_S:
772 return XML_ROLE_ATTLIST_NONE;
773 case XML_TOK_NMTOKEN:
774 case XML_TOK_NAME:
775 case XML_TOK_PREFIXED_NAME:
776 state->handler = attlist4;
777 return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
778 }
779 return common(state, tok);
780 }
781
782 static int PTRCALL
attlist4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)783 attlist4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
784 const ENCODING *enc) {
785 UNUSED_P(ptr);
786 UNUSED_P(end);
787 UNUSED_P(enc);
788 switch (tok) {
789 case XML_TOK_PROLOG_S:
790 return XML_ROLE_ATTLIST_NONE;
791 case XML_TOK_CLOSE_PAREN:
792 state->handler = attlist8;
793 return XML_ROLE_ATTLIST_NONE;
794 case XML_TOK_OR:
795 state->handler = attlist3;
796 return XML_ROLE_ATTLIST_NONE;
797 }
798 return common(state, tok);
799 }
800
801 static int PTRCALL
attlist5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)802 attlist5(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
803 const ENCODING *enc) {
804 UNUSED_P(ptr);
805 UNUSED_P(end);
806 UNUSED_P(enc);
807 switch (tok) {
808 case XML_TOK_PROLOG_S:
809 return XML_ROLE_ATTLIST_NONE;
810 case XML_TOK_OPEN_PAREN:
811 state->handler = attlist6;
812 return XML_ROLE_ATTLIST_NONE;
813 }
814 return common(state, tok);
815 }
816
817 static int PTRCALL
attlist6(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)818 attlist6(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
819 const ENCODING *enc) {
820 UNUSED_P(ptr);
821 UNUSED_P(end);
822 UNUSED_P(enc);
823 switch (tok) {
824 case XML_TOK_PROLOG_S:
825 return XML_ROLE_ATTLIST_NONE;
826 case XML_TOK_NAME:
827 state->handler = attlist7;
828 return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
829 }
830 return common(state, tok);
831 }
832
833 static int PTRCALL
attlist7(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)834 attlist7(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
835 const ENCODING *enc) {
836 UNUSED_P(ptr);
837 UNUSED_P(end);
838 UNUSED_P(enc);
839 switch (tok) {
840 case XML_TOK_PROLOG_S:
841 return XML_ROLE_ATTLIST_NONE;
842 case XML_TOK_CLOSE_PAREN:
843 state->handler = attlist8;
844 return XML_ROLE_ATTLIST_NONE;
845 case XML_TOK_OR:
846 state->handler = attlist6;
847 return XML_ROLE_ATTLIST_NONE;
848 }
849 return common(state, tok);
850 }
851
852 /* default value */
853 static int PTRCALL
attlist8(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)854 attlist8(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
855 const ENCODING *enc) {
856 switch (tok) {
857 case XML_TOK_PROLOG_S:
858 return XML_ROLE_ATTLIST_NONE;
859 case XML_TOK_POUND_NAME:
860 if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end,
861 KW_IMPLIED)) {
862 state->handler = attlist1;
863 return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
864 }
865 if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end,
866 KW_REQUIRED)) {
867 state->handler = attlist1;
868 return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
869 }
870 if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end,
871 KW_FIXED)) {
872 state->handler = attlist9;
873 return XML_ROLE_ATTLIST_NONE;
874 }
875 break;
876 case XML_TOK_LITERAL:
877 state->handler = attlist1;
878 return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
879 }
880 return common(state, tok);
881 }
882
883 static int PTRCALL
attlist9(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)884 attlist9(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
885 const ENCODING *enc) {
886 UNUSED_P(ptr);
887 UNUSED_P(end);
888 UNUSED_P(enc);
889 switch (tok) {
890 case XML_TOK_PROLOG_S:
891 return XML_ROLE_ATTLIST_NONE;
892 case XML_TOK_LITERAL:
893 state->handler = attlist1;
894 return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
895 }
896 return common(state, tok);
897 }
898
899 static int PTRCALL
element0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)900 element0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
901 const ENCODING *enc) {
902 UNUSED_P(ptr);
903 UNUSED_P(end);
904 UNUSED_P(enc);
905 switch (tok) {
906 case XML_TOK_PROLOG_S:
907 return XML_ROLE_ELEMENT_NONE;
908 case XML_TOK_NAME:
909 case XML_TOK_PREFIXED_NAME:
910 state->handler = element1;
911 return XML_ROLE_ELEMENT_NAME;
912 }
913 return common(state, tok);
914 }
915
916 static int PTRCALL
element1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)917 element1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
918 const ENCODING *enc) {
919 switch (tok) {
920 case XML_TOK_PROLOG_S:
921 return XML_ROLE_ELEMENT_NONE;
922 case XML_TOK_NAME:
923 if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
924 state->handler = declClose;
925 state->role_none = XML_ROLE_ELEMENT_NONE;
926 return XML_ROLE_CONTENT_EMPTY;
927 }
928 if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
929 state->handler = declClose;
930 state->role_none = XML_ROLE_ELEMENT_NONE;
931 return XML_ROLE_CONTENT_ANY;
932 }
933 break;
934 case XML_TOK_OPEN_PAREN:
935 state->handler = element2;
936 state->level = 1;
937 return XML_ROLE_GROUP_OPEN;
938 }
939 return common(state, tok);
940 }
941
942 static int PTRCALL
element2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)943 element2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
944 const ENCODING *enc) {
945 switch (tok) {
946 case XML_TOK_PROLOG_S:
947 return XML_ROLE_ELEMENT_NONE;
948 case XML_TOK_POUND_NAME:
949 if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end,
950 KW_PCDATA)) {
951 state->handler = element3;
952 return XML_ROLE_CONTENT_PCDATA;
953 }
954 break;
955 case XML_TOK_OPEN_PAREN:
956 state->level = 2;
957 state->handler = element6;
958 return XML_ROLE_GROUP_OPEN;
959 case XML_TOK_NAME:
960 case XML_TOK_PREFIXED_NAME:
961 state->handler = element7;
962 return XML_ROLE_CONTENT_ELEMENT;
963 case XML_TOK_NAME_QUESTION:
964 state->handler = element7;
965 return XML_ROLE_CONTENT_ELEMENT_OPT;
966 case XML_TOK_NAME_ASTERISK:
967 state->handler = element7;
968 return XML_ROLE_CONTENT_ELEMENT_REP;
969 case XML_TOK_NAME_PLUS:
970 state->handler = element7;
971 return XML_ROLE_CONTENT_ELEMENT_PLUS;
972 }
973 return common(state, tok);
974 }
975
976 static int PTRCALL
element3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)977 element3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
978 const ENCODING *enc) {
979 UNUSED_P(ptr);
980 UNUSED_P(end);
981 UNUSED_P(enc);
982 switch (tok) {
983 case XML_TOK_PROLOG_S:
984 return XML_ROLE_ELEMENT_NONE;
985 case XML_TOK_CLOSE_PAREN:
986 state->handler = declClose;
987 state->role_none = XML_ROLE_ELEMENT_NONE;
988 return XML_ROLE_GROUP_CLOSE;
989 case XML_TOK_CLOSE_PAREN_ASTERISK:
990 state->handler = declClose;
991 state->role_none = XML_ROLE_ELEMENT_NONE;
992 return XML_ROLE_GROUP_CLOSE_REP;
993 case XML_TOK_OR:
994 state->handler = element4;
995 return XML_ROLE_ELEMENT_NONE;
996 }
997 return common(state, tok);
998 }
999
1000 static int PTRCALL
element4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1001 element4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1002 const ENCODING *enc) {
1003 UNUSED_P(ptr);
1004 UNUSED_P(end);
1005 UNUSED_P(enc);
1006 switch (tok) {
1007 case XML_TOK_PROLOG_S:
1008 return XML_ROLE_ELEMENT_NONE;
1009 case XML_TOK_NAME:
1010 case XML_TOK_PREFIXED_NAME:
1011 state->handler = element5;
1012 return XML_ROLE_CONTENT_ELEMENT;
1013 }
1014 return common(state, tok);
1015 }
1016
1017 static int PTRCALL
element5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1018 element5(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1019 const ENCODING *enc) {
1020 UNUSED_P(ptr);
1021 UNUSED_P(end);
1022 UNUSED_P(enc);
1023 switch (tok) {
1024 case XML_TOK_PROLOG_S:
1025 return XML_ROLE_ELEMENT_NONE;
1026 case XML_TOK_CLOSE_PAREN_ASTERISK:
1027 state->handler = declClose;
1028 state->role_none = XML_ROLE_ELEMENT_NONE;
1029 return XML_ROLE_GROUP_CLOSE_REP;
1030 case XML_TOK_OR:
1031 state->handler = element4;
1032 return XML_ROLE_ELEMENT_NONE;
1033 }
1034 return common(state, tok);
1035 }
1036
1037 static int PTRCALL
element6(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1038 element6(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1039 const ENCODING *enc) {
1040 UNUSED_P(ptr);
1041 UNUSED_P(end);
1042 UNUSED_P(enc);
1043 switch (tok) {
1044 case XML_TOK_PROLOG_S:
1045 return XML_ROLE_ELEMENT_NONE;
1046 case XML_TOK_OPEN_PAREN:
1047 state->level += 1;
1048 return XML_ROLE_GROUP_OPEN;
1049 case XML_TOK_NAME:
1050 case XML_TOK_PREFIXED_NAME:
1051 state->handler = element7;
1052 return XML_ROLE_CONTENT_ELEMENT;
1053 case XML_TOK_NAME_QUESTION:
1054 state->handler = element7;
1055 return XML_ROLE_CONTENT_ELEMENT_OPT;
1056 case XML_TOK_NAME_ASTERISK:
1057 state->handler = element7;
1058 return XML_ROLE_CONTENT_ELEMENT_REP;
1059 case XML_TOK_NAME_PLUS:
1060 state->handler = element7;
1061 return XML_ROLE_CONTENT_ELEMENT_PLUS;
1062 }
1063 return common(state, tok);
1064 }
1065
1066 static int PTRCALL
element7(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1067 element7(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1068 const ENCODING *enc) {
1069 UNUSED_P(ptr);
1070 UNUSED_P(end);
1071 UNUSED_P(enc);
1072 switch (tok) {
1073 case XML_TOK_PROLOG_S:
1074 return XML_ROLE_ELEMENT_NONE;
1075 case XML_TOK_CLOSE_PAREN:
1076 state->level -= 1;
1077 if (state->level == 0) {
1078 state->handler = declClose;
1079 state->role_none = XML_ROLE_ELEMENT_NONE;
1080 }
1081 return XML_ROLE_GROUP_CLOSE;
1082 case XML_TOK_CLOSE_PAREN_ASTERISK:
1083 state->level -= 1;
1084 if (state->level == 0) {
1085 state->handler = declClose;
1086 state->role_none = XML_ROLE_ELEMENT_NONE;
1087 }
1088 return XML_ROLE_GROUP_CLOSE_REP;
1089 case XML_TOK_CLOSE_PAREN_QUESTION:
1090 state->level -= 1;
1091 if (state->level == 0) {
1092 state->handler = declClose;
1093 state->role_none = XML_ROLE_ELEMENT_NONE;
1094 }
1095 return XML_ROLE_GROUP_CLOSE_OPT;
1096 case XML_TOK_CLOSE_PAREN_PLUS:
1097 state->level -= 1;
1098 if (state->level == 0) {
1099 state->handler = declClose;
1100 state->role_none = XML_ROLE_ELEMENT_NONE;
1101 }
1102 return XML_ROLE_GROUP_CLOSE_PLUS;
1103 case XML_TOK_COMMA:
1104 state->handler = element6;
1105 return XML_ROLE_GROUP_SEQUENCE;
1106 case XML_TOK_OR:
1107 state->handler = element6;
1108 return XML_ROLE_GROUP_CHOICE;
1109 }
1110 return common(state, tok);
1111 }
1112
1113 #ifdef XML_DTD
1114
1115 static int PTRCALL
condSect0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1116 condSect0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1117 const ENCODING *enc) {
1118 switch (tok) {
1119 case XML_TOK_PROLOG_S:
1120 return XML_ROLE_NONE;
1121 case XML_TOK_NAME:
1122 if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) {
1123 state->handler = condSect1;
1124 return XML_ROLE_NONE;
1125 }
1126 if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) {
1127 state->handler = condSect2;
1128 return XML_ROLE_NONE;
1129 }
1130 break;
1131 }
1132 return common(state, tok);
1133 }
1134
1135 static int PTRCALL
condSect1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1136 condSect1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1137 const ENCODING *enc) {
1138 UNUSED_P(ptr);
1139 UNUSED_P(end);
1140 UNUSED_P(enc);
1141 switch (tok) {
1142 case XML_TOK_PROLOG_S:
1143 return XML_ROLE_NONE;
1144 case XML_TOK_OPEN_BRACKET:
1145 state->handler = externalSubset1;
1146 state->includeLevel += 1;
1147 return XML_ROLE_NONE;
1148 }
1149 return common(state, tok);
1150 }
1151
1152 static int PTRCALL
condSect2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1153 condSect2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1154 const ENCODING *enc) {
1155 UNUSED_P(ptr);
1156 UNUSED_P(end);
1157 UNUSED_P(enc);
1158 switch (tok) {
1159 case XML_TOK_PROLOG_S:
1160 return XML_ROLE_NONE;
1161 case XML_TOK_OPEN_BRACKET:
1162 state->handler = externalSubset1;
1163 return XML_ROLE_IGNORE_SECT;
1164 }
1165 return common(state, tok);
1166 }
1167
1168 #endif /* XML_DTD */
1169
1170 static int PTRCALL
declClose(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1171 declClose(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1172 const ENCODING *enc) {
1173 UNUSED_P(ptr);
1174 UNUSED_P(end);
1175 UNUSED_P(enc);
1176 switch (tok) {
1177 case XML_TOK_PROLOG_S:
1178 return state->role_none;
1179 case XML_TOK_DECL_CLOSE:
1180 setTopLevel(state);
1181 return state->role_none;
1182 }
1183 return common(state, tok);
1184 }
1185
1186 /* This function will only be invoked if the internal logic of the
1187 * parser has broken down. It is used in two cases:
1188 *
1189 * 1: When the XML prolog has been finished. At this point the
1190 * processor (the parser level above these role handlers) should
1191 * switch from prologProcessor to contentProcessor and reinitialise
1192 * the handler function.
1193 *
1194 * 2: When an error has been detected (via common() below). At this
1195 * point again the processor should be switched to errorProcessor,
1196 * which will never call a handler.
1197 *
1198 * The result of this is that error() can only be called if the
1199 * processor switch failed to happen, which is an internal error and
1200 * therefore we shouldn't be able to provoke it simply by using the
1201 * library. It is a necessary backstop, however, so we merely exclude
1202 * it from the coverage statistics.
1203 *
1204 * LCOV_EXCL_START
1205 */
1206 static int PTRCALL
error(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1207 error(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1208 const ENCODING *enc) {
1209 UNUSED_P(state);
1210 UNUSED_P(tok);
1211 UNUSED_P(ptr);
1212 UNUSED_P(end);
1213 UNUSED_P(enc);
1214 return XML_ROLE_NONE;
1215 }
1216 /* LCOV_EXCL_STOP */
1217
1218 static int FASTCALL
common(PROLOG_STATE * state,int tok)1219 common(PROLOG_STATE *state, int tok) {
1220 #ifdef XML_DTD
1221 if (! state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
1222 return XML_ROLE_INNER_PARAM_ENTITY_REF;
1223 #endif
1224 state->handler = error;
1225 return XML_ROLE_ERROR;
1226 }
1227
1228 void
XmlPrologStateInit(PROLOG_STATE * state)1229 XmlPrologStateInit(PROLOG_STATE *state) {
1230 state->handler = prolog0;
1231 #ifdef XML_DTD
1232 state->documentEntity = 1;
1233 state->includeLevel = 0;
1234 state->inEntityValue = 0;
1235 #endif /* XML_DTD */
1236 }
1237
1238 #ifdef XML_DTD
1239
1240 void
XmlPrologStateInitExternalEntity(PROLOG_STATE * state)1241 XmlPrologStateInitExternalEntity(PROLOG_STATE *state) {
1242 state->handler = externalSubset0;
1243 state->documentEntity = 0;
1244 state->includeLevel = 0;
1245 }
1246
1247 #endif /* XML_DTD */
1248