1 /*
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10    Copyright (c) 2000-2017 Expat development team
11    Licensed under the MIT license:
12 
13    Permission is  hereby granted,  free of charge,  to any  person obtaining
14    a  copy  of  this  software   and  associated  documentation  files  (the
15    "Software"),  to  deal in  the  Software  without restriction,  including
16    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
17    distribute, sublicense, and/or sell copies of the Software, and to permit
18    persons  to whom  the Software  is  furnished to  do so,  subject to  the
19    following conditions:
20 
21    The above copyright  notice and this permission notice  shall be included
22    in all copies or substantial portions of the Software.
23 
24    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
25    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
26    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
29    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30    USE OR OTHER DEALINGS IN THE SOFTWARE.
31 */
32 
33 #include <stddef.h>
34 
35 #ifdef _WIN32
36 #  include "winconfig.h"
37 #else
38 #  ifdef HAVE_EXPAT_CONFIG_H
39 #    include <expat_config.h>
40 #  endif
41 #endif /* ndef _WIN32 */
42 
43 #include "expat_external.h"
44 #include "internal.h"
45 #include "xmlrole.h"
46 #include "ascii.h"
47 
48 /* Doesn't check:
49 
50  that ,| are not mixed in a model group
51  content of literals
52 
53 */
54 
55 static const char KW_ANY[] = {ASCII_A, ASCII_N, ASCII_Y, '\0'};
56 static const char KW_ATTLIST[]
57     = {ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0'};
58 static const char KW_CDATA[]
59     = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
60 static const char KW_DOCTYPE[]
61     = {ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0'};
62 static const char KW_ELEMENT[]
63     = {ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0'};
64 static const char KW_EMPTY[]
65     = {ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0'};
66 static const char KW_ENTITIES[] = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
67                                    ASCII_I, ASCII_E, ASCII_S, '\0'};
68 static const char KW_ENTITY[]
69     = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
70 static const char KW_FIXED[]
71     = {ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0'};
72 static const char KW_ID[] = {ASCII_I, ASCII_D, '\0'};
73 static const char KW_IDREF[]
74     = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
75 static const char KW_IDREFS[]
76     = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
77 #ifdef XML_DTD
78 static const char KW_IGNORE[]
79     = {ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0'};
80 #endif
81 static const char KW_IMPLIED[]
82     = {ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0'};
83 #ifdef XML_DTD
84 static const char KW_INCLUDE[]
85     = {ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0'};
86 #endif
87 static const char KW_NDATA[]
88     = {ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
89 static const char KW_NMTOKEN[]
90     = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
91 static const char KW_NMTOKENS[] = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
92                                    ASCII_E, ASCII_N, ASCII_S, '\0'};
93 static const char KW_NOTATION[] = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T,
94                                    ASCII_I, ASCII_O, ASCII_N, '\0'};
95 static const char KW_PCDATA[]
96     = {ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
97 static const char KW_PUBLIC[]
98     = {ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0'};
99 static const char KW_REQUIRED[] = {ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I,
100                                    ASCII_R, ASCII_E, ASCII_D, '\0'};
101 static const char KW_SYSTEM[]
102     = {ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0'};
103 
104 #ifndef MIN_BYTES_PER_CHAR
105 #  define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
106 #endif
107 
108 #ifdef XML_DTD
109 #  define setTopLevel(state)                                                   \
110     ((state)->handler                                                          \
111      = ((state)->documentEntity ? internalSubset : externalSubset1))
112 #else /* not XML_DTD */
113 #  define setTopLevel(state) ((state)->handler = internalSubset)
114 #endif /* not XML_DTD */
115 
116 typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state, int tok,
117                                    const char *ptr, const char *end,
118                                    const ENCODING *enc);
119 
120 static PROLOG_HANDLER prolog0, prolog1, prolog2, doctype0, doctype1, doctype2,
121     doctype3, doctype4, doctype5, internalSubset, entity0, entity1, entity2,
122     entity3, entity4, entity5, entity6, entity7, entity8, entity9, entity10,
123     notation0, notation1, notation2, notation3, notation4, attlist0, attlist1,
124     attlist2, attlist3, attlist4, attlist5, attlist6, attlist7, attlist8,
125     attlist9, element0, element1, element2, element3, element4, element5,
126     element6, element7,
127 #ifdef XML_DTD
128     externalSubset0, externalSubset1, condSect0, condSect1, condSect2,
129 #endif /* XML_DTD */
130     declClose, error;
131 
132 static int FASTCALL common(PROLOG_STATE *state, int tok);
133 
134 static int PTRCALL
prolog0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)135 prolog0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
136         const ENCODING *enc) {
137   switch (tok) {
138   case XML_TOK_PROLOG_S:
139     state->handler = prolog1;
140     return XML_ROLE_NONE;
141   case XML_TOK_XML_DECL:
142     state->handler = prolog1;
143     return XML_ROLE_XML_DECL;
144   case XML_TOK_PI:
145     state->handler = prolog1;
146     return XML_ROLE_PI;
147   case XML_TOK_COMMENT:
148     state->handler = prolog1;
149     return XML_ROLE_COMMENT;
150   case XML_TOK_BOM:
151     return XML_ROLE_NONE;
152   case XML_TOK_DECL_OPEN:
153     if (! XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
154                               KW_DOCTYPE))
155       break;
156     state->handler = doctype0;
157     return XML_ROLE_DOCTYPE_NONE;
158   case XML_TOK_INSTANCE_START:
159     state->handler = error;
160     return XML_ROLE_INSTANCE_START;
161   }
162   return common(state, tok);
163 }
164 
165 static int PTRCALL
prolog1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)166 prolog1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
167         const ENCODING *enc) {
168   switch (tok) {
169   case XML_TOK_PROLOG_S:
170     return XML_ROLE_NONE;
171   case XML_TOK_PI:
172     return XML_ROLE_PI;
173   case XML_TOK_COMMENT:
174     return XML_ROLE_COMMENT;
175   case XML_TOK_BOM:
176     /* This case can never arise.  To reach this role function, the
177      * parse must have passed through prolog0 and therefore have had
178      * some form of input, even if only a space.  At that point, a
179      * byte order mark is no longer a valid character (though
180      * technically it should be interpreted as a non-breaking space),
181      * so will be rejected by the tokenizing stages.
182      */
183     return XML_ROLE_NONE; /* LCOV_EXCL_LINE */
184   case XML_TOK_DECL_OPEN:
185     if (! XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
186                               KW_DOCTYPE))
187       break;
188     state->handler = doctype0;
189     return XML_ROLE_DOCTYPE_NONE;
190   case XML_TOK_INSTANCE_START:
191     state->handler = error;
192     return XML_ROLE_INSTANCE_START;
193   }
194   return common(state, tok);
195 }
196 
197 static int PTRCALL
prolog2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)198 prolog2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
199         const ENCODING *enc) {
200   UNUSED_P(ptr);
201   UNUSED_P(end);
202   UNUSED_P(enc);
203   switch (tok) {
204   case XML_TOK_PROLOG_S:
205     return XML_ROLE_NONE;
206   case XML_TOK_PI:
207     return XML_ROLE_PI;
208   case XML_TOK_COMMENT:
209     return XML_ROLE_COMMENT;
210   case XML_TOK_INSTANCE_START:
211     state->handler = error;
212     return XML_ROLE_INSTANCE_START;
213   }
214   return common(state, tok);
215 }
216 
217 static int PTRCALL
doctype0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)218 doctype0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
219          const ENCODING *enc) {
220   UNUSED_P(ptr);
221   UNUSED_P(end);
222   UNUSED_P(enc);
223   switch (tok) {
224   case XML_TOK_PROLOG_S:
225     return XML_ROLE_DOCTYPE_NONE;
226   case XML_TOK_NAME:
227   case XML_TOK_PREFIXED_NAME:
228     state->handler = doctype1;
229     return XML_ROLE_DOCTYPE_NAME;
230   }
231   return common(state, tok);
232 }
233 
234 static int PTRCALL
doctype1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)235 doctype1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
236          const ENCODING *enc) {
237   switch (tok) {
238   case XML_TOK_PROLOG_S:
239     return XML_ROLE_DOCTYPE_NONE;
240   case XML_TOK_OPEN_BRACKET:
241     state->handler = internalSubset;
242     return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
243   case XML_TOK_DECL_CLOSE:
244     state->handler = prolog2;
245     return XML_ROLE_DOCTYPE_CLOSE;
246   case XML_TOK_NAME:
247     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
248       state->handler = doctype3;
249       return XML_ROLE_DOCTYPE_NONE;
250     }
251     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
252       state->handler = doctype2;
253       return XML_ROLE_DOCTYPE_NONE;
254     }
255     break;
256   }
257   return common(state, tok);
258 }
259 
260 static int PTRCALL
doctype2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)261 doctype2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
262          const ENCODING *enc) {
263   UNUSED_P(ptr);
264   UNUSED_P(end);
265   UNUSED_P(enc);
266   switch (tok) {
267   case XML_TOK_PROLOG_S:
268     return XML_ROLE_DOCTYPE_NONE;
269   case XML_TOK_LITERAL:
270     state->handler = doctype3;
271     return XML_ROLE_DOCTYPE_PUBLIC_ID;
272   }
273   return common(state, tok);
274 }
275 
276 static int PTRCALL
doctype3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)277 doctype3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
278          const ENCODING *enc) {
279   UNUSED_P(ptr);
280   UNUSED_P(end);
281   UNUSED_P(enc);
282   switch (tok) {
283   case XML_TOK_PROLOG_S:
284     return XML_ROLE_DOCTYPE_NONE;
285   case XML_TOK_LITERAL:
286     state->handler = doctype4;
287     return XML_ROLE_DOCTYPE_SYSTEM_ID;
288   }
289   return common(state, tok);
290 }
291 
292 static int PTRCALL
doctype4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)293 doctype4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
294          const ENCODING *enc) {
295   UNUSED_P(ptr);
296   UNUSED_P(end);
297   UNUSED_P(enc);
298   switch (tok) {
299   case XML_TOK_PROLOG_S:
300     return XML_ROLE_DOCTYPE_NONE;
301   case XML_TOK_OPEN_BRACKET:
302     state->handler = internalSubset;
303     return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
304   case XML_TOK_DECL_CLOSE:
305     state->handler = prolog2;
306     return XML_ROLE_DOCTYPE_CLOSE;
307   }
308   return common(state, tok);
309 }
310 
311 static int PTRCALL
doctype5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)312 doctype5(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
313          const ENCODING *enc) {
314   UNUSED_P(ptr);
315   UNUSED_P(end);
316   UNUSED_P(enc);
317   switch (tok) {
318   case XML_TOK_PROLOG_S:
319     return XML_ROLE_DOCTYPE_NONE;
320   case XML_TOK_DECL_CLOSE:
321     state->handler = prolog2;
322     return XML_ROLE_DOCTYPE_CLOSE;
323   }
324   return common(state, tok);
325 }
326 
327 static int PTRCALL
internalSubset(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)328 internalSubset(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
329                const ENCODING *enc) {
330   switch (tok) {
331   case XML_TOK_PROLOG_S:
332     return XML_ROLE_NONE;
333   case XML_TOK_DECL_OPEN:
334     if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
335                             KW_ENTITY)) {
336       state->handler = entity0;
337       return XML_ROLE_ENTITY_NONE;
338     }
339     if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
340                             KW_ATTLIST)) {
341       state->handler = attlist0;
342       return XML_ROLE_ATTLIST_NONE;
343     }
344     if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
345                             KW_ELEMENT)) {
346       state->handler = element0;
347       return XML_ROLE_ELEMENT_NONE;
348     }
349     if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end,
350                             KW_NOTATION)) {
351       state->handler = notation0;
352       return XML_ROLE_NOTATION_NONE;
353     }
354     break;
355   case XML_TOK_PI:
356     return XML_ROLE_PI;
357   case XML_TOK_COMMENT:
358     return XML_ROLE_COMMENT;
359   case XML_TOK_PARAM_ENTITY_REF:
360     return XML_ROLE_PARAM_ENTITY_REF;
361   case XML_TOK_CLOSE_BRACKET:
362     state->handler = doctype5;
363     return XML_ROLE_DOCTYPE_NONE;
364   case XML_TOK_NONE:
365     return XML_ROLE_NONE;
366   }
367   return common(state, tok);
368 }
369 
370 #ifdef XML_DTD
371 
372 static int PTRCALL
externalSubset0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)373 externalSubset0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
374                 const ENCODING *enc) {
375   state->handler = externalSubset1;
376   if (tok == XML_TOK_XML_DECL)
377     return XML_ROLE_TEXT_DECL;
378   return externalSubset1(state, tok, ptr, end, enc);
379 }
380 
381 static int PTRCALL
externalSubset1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)382 externalSubset1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
383                 const ENCODING *enc) {
384   switch (tok) {
385   case XML_TOK_COND_SECT_OPEN:
386     state->handler = condSect0;
387     return XML_ROLE_NONE;
388   case XML_TOK_COND_SECT_CLOSE:
389     if (state->includeLevel == 0)
390       break;
391     state->includeLevel -= 1;
392     return XML_ROLE_NONE;
393   case XML_TOK_PROLOG_S:
394     return XML_ROLE_NONE;
395   case XML_TOK_CLOSE_BRACKET:
396     break;
397   case XML_TOK_NONE:
398     if (state->includeLevel)
399       break;
400     return XML_ROLE_NONE;
401   default:
402     return internalSubset(state, tok, ptr, end, enc);
403   }
404   return common(state, tok);
405 }
406 
407 #endif /* XML_DTD */
408 
409 static int PTRCALL
entity0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)410 entity0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
411         const ENCODING *enc) {
412   UNUSED_P(ptr);
413   UNUSED_P(end);
414   UNUSED_P(enc);
415   switch (tok) {
416   case XML_TOK_PROLOG_S:
417     return XML_ROLE_ENTITY_NONE;
418   case XML_TOK_PERCENT:
419     state->handler = entity1;
420     return XML_ROLE_ENTITY_NONE;
421   case XML_TOK_NAME:
422     state->handler = entity2;
423     return XML_ROLE_GENERAL_ENTITY_NAME;
424   }
425   return common(state, tok);
426 }
427 
428 static int PTRCALL
entity1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)429 entity1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
430         const ENCODING *enc) {
431   UNUSED_P(ptr);
432   UNUSED_P(end);
433   UNUSED_P(enc);
434   switch (tok) {
435   case XML_TOK_PROLOG_S:
436     return XML_ROLE_ENTITY_NONE;
437   case XML_TOK_NAME:
438     state->handler = entity7;
439     return XML_ROLE_PARAM_ENTITY_NAME;
440   }
441   return common(state, tok);
442 }
443 
444 static int PTRCALL
entity2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)445 entity2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
446         const ENCODING *enc) {
447   switch (tok) {
448   case XML_TOK_PROLOG_S:
449     return XML_ROLE_ENTITY_NONE;
450   case XML_TOK_NAME:
451     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
452       state->handler = entity4;
453       return XML_ROLE_ENTITY_NONE;
454     }
455     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
456       state->handler = entity3;
457       return XML_ROLE_ENTITY_NONE;
458     }
459     break;
460   case XML_TOK_LITERAL:
461     state->handler = declClose;
462     state->role_none = XML_ROLE_ENTITY_NONE;
463     return XML_ROLE_ENTITY_VALUE;
464   }
465   return common(state, tok);
466 }
467 
468 static int PTRCALL
entity3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)469 entity3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
470         const ENCODING *enc) {
471   UNUSED_P(ptr);
472   UNUSED_P(end);
473   UNUSED_P(enc);
474   switch (tok) {
475   case XML_TOK_PROLOG_S:
476     return XML_ROLE_ENTITY_NONE;
477   case XML_TOK_LITERAL:
478     state->handler = entity4;
479     return XML_ROLE_ENTITY_PUBLIC_ID;
480   }
481   return common(state, tok);
482 }
483 
484 static int PTRCALL
entity4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)485 entity4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
486         const ENCODING *enc) {
487   UNUSED_P(ptr);
488   UNUSED_P(end);
489   UNUSED_P(enc);
490   switch (tok) {
491   case XML_TOK_PROLOG_S:
492     return XML_ROLE_ENTITY_NONE;
493   case XML_TOK_LITERAL:
494     state->handler = entity5;
495     return XML_ROLE_ENTITY_SYSTEM_ID;
496   }
497   return common(state, tok);
498 }
499 
500 static int PTRCALL
entity5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)501 entity5(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
502         const ENCODING *enc) {
503   switch (tok) {
504   case XML_TOK_PROLOG_S:
505     return XML_ROLE_ENTITY_NONE;
506   case XML_TOK_DECL_CLOSE:
507     setTopLevel(state);
508     return XML_ROLE_ENTITY_COMPLETE;
509   case XML_TOK_NAME:
510     if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
511       state->handler = entity6;
512       return XML_ROLE_ENTITY_NONE;
513     }
514     break;
515   }
516   return common(state, tok);
517 }
518 
519 static int PTRCALL
entity6(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)520 entity6(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
521         const ENCODING *enc) {
522   UNUSED_P(ptr);
523   UNUSED_P(end);
524   UNUSED_P(enc);
525   switch (tok) {
526   case XML_TOK_PROLOG_S:
527     return XML_ROLE_ENTITY_NONE;
528   case XML_TOK_NAME:
529     state->handler = declClose;
530     state->role_none = XML_ROLE_ENTITY_NONE;
531     return XML_ROLE_ENTITY_NOTATION_NAME;
532   }
533   return common(state, tok);
534 }
535 
536 static int PTRCALL
entity7(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)537 entity7(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
538         const ENCODING *enc) {
539   switch (tok) {
540   case XML_TOK_PROLOG_S:
541     return XML_ROLE_ENTITY_NONE;
542   case XML_TOK_NAME:
543     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
544       state->handler = entity9;
545       return XML_ROLE_ENTITY_NONE;
546     }
547     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
548       state->handler = entity8;
549       return XML_ROLE_ENTITY_NONE;
550     }
551     break;
552   case XML_TOK_LITERAL:
553     state->handler = declClose;
554     state->role_none = XML_ROLE_ENTITY_NONE;
555     return XML_ROLE_ENTITY_VALUE;
556   }
557   return common(state, tok);
558 }
559 
560 static int PTRCALL
entity8(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)561 entity8(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
562         const ENCODING *enc) {
563   UNUSED_P(ptr);
564   UNUSED_P(end);
565   UNUSED_P(enc);
566   switch (tok) {
567   case XML_TOK_PROLOG_S:
568     return XML_ROLE_ENTITY_NONE;
569   case XML_TOK_LITERAL:
570     state->handler = entity9;
571     return XML_ROLE_ENTITY_PUBLIC_ID;
572   }
573   return common(state, tok);
574 }
575 
576 static int PTRCALL
entity9(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)577 entity9(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
578         const ENCODING *enc) {
579   UNUSED_P(ptr);
580   UNUSED_P(end);
581   UNUSED_P(enc);
582   switch (tok) {
583   case XML_TOK_PROLOG_S:
584     return XML_ROLE_ENTITY_NONE;
585   case XML_TOK_LITERAL:
586     state->handler = entity10;
587     return XML_ROLE_ENTITY_SYSTEM_ID;
588   }
589   return common(state, tok);
590 }
591 
592 static int PTRCALL
entity10(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)593 entity10(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
594          const ENCODING *enc) {
595   UNUSED_P(ptr);
596   UNUSED_P(end);
597   UNUSED_P(enc);
598   switch (tok) {
599   case XML_TOK_PROLOG_S:
600     return XML_ROLE_ENTITY_NONE;
601   case XML_TOK_DECL_CLOSE:
602     setTopLevel(state);
603     return XML_ROLE_ENTITY_COMPLETE;
604   }
605   return common(state, tok);
606 }
607 
608 static int PTRCALL
notation0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)609 notation0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
610           const ENCODING *enc) {
611   UNUSED_P(ptr);
612   UNUSED_P(end);
613   UNUSED_P(enc);
614   switch (tok) {
615   case XML_TOK_PROLOG_S:
616     return XML_ROLE_NOTATION_NONE;
617   case XML_TOK_NAME:
618     state->handler = notation1;
619     return XML_ROLE_NOTATION_NAME;
620   }
621   return common(state, tok);
622 }
623 
624 static int PTRCALL
notation1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)625 notation1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
626           const ENCODING *enc) {
627   switch (tok) {
628   case XML_TOK_PROLOG_S:
629     return XML_ROLE_NOTATION_NONE;
630   case XML_TOK_NAME:
631     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
632       state->handler = notation3;
633       return XML_ROLE_NOTATION_NONE;
634     }
635     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
636       state->handler = notation2;
637       return XML_ROLE_NOTATION_NONE;
638     }
639     break;
640   }
641   return common(state, tok);
642 }
643 
644 static int PTRCALL
notation2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)645 notation2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
646           const ENCODING *enc) {
647   UNUSED_P(ptr);
648   UNUSED_P(end);
649   UNUSED_P(enc);
650   switch (tok) {
651   case XML_TOK_PROLOG_S:
652     return XML_ROLE_NOTATION_NONE;
653   case XML_TOK_LITERAL:
654     state->handler = notation4;
655     return XML_ROLE_NOTATION_PUBLIC_ID;
656   }
657   return common(state, tok);
658 }
659 
660 static int PTRCALL
notation3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)661 notation3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
662           const ENCODING *enc) {
663   UNUSED_P(ptr);
664   UNUSED_P(end);
665   UNUSED_P(enc);
666   switch (tok) {
667   case XML_TOK_PROLOG_S:
668     return XML_ROLE_NOTATION_NONE;
669   case XML_TOK_LITERAL:
670     state->handler = declClose;
671     state->role_none = XML_ROLE_NOTATION_NONE;
672     return XML_ROLE_NOTATION_SYSTEM_ID;
673   }
674   return common(state, tok);
675 }
676 
677 static int PTRCALL
notation4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)678 notation4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
679           const ENCODING *enc) {
680   UNUSED_P(ptr);
681   UNUSED_P(end);
682   UNUSED_P(enc);
683   switch (tok) {
684   case XML_TOK_PROLOG_S:
685     return XML_ROLE_NOTATION_NONE;
686   case XML_TOK_LITERAL:
687     state->handler = declClose;
688     state->role_none = XML_ROLE_NOTATION_NONE;
689     return XML_ROLE_NOTATION_SYSTEM_ID;
690   case XML_TOK_DECL_CLOSE:
691     setTopLevel(state);
692     return XML_ROLE_NOTATION_NO_SYSTEM_ID;
693   }
694   return common(state, tok);
695 }
696 
697 static int PTRCALL
attlist0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)698 attlist0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
699          const ENCODING *enc) {
700   UNUSED_P(ptr);
701   UNUSED_P(end);
702   UNUSED_P(enc);
703   switch (tok) {
704   case XML_TOK_PROLOG_S:
705     return XML_ROLE_ATTLIST_NONE;
706   case XML_TOK_NAME:
707   case XML_TOK_PREFIXED_NAME:
708     state->handler = attlist1;
709     return XML_ROLE_ATTLIST_ELEMENT_NAME;
710   }
711   return common(state, tok);
712 }
713 
714 static int PTRCALL
attlist1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)715 attlist1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
716          const ENCODING *enc) {
717   UNUSED_P(ptr);
718   UNUSED_P(end);
719   UNUSED_P(enc);
720   switch (tok) {
721   case XML_TOK_PROLOG_S:
722     return XML_ROLE_ATTLIST_NONE;
723   case XML_TOK_DECL_CLOSE:
724     setTopLevel(state);
725     return XML_ROLE_ATTLIST_NONE;
726   case XML_TOK_NAME:
727   case XML_TOK_PREFIXED_NAME:
728     state->handler = attlist2;
729     return XML_ROLE_ATTRIBUTE_NAME;
730   }
731   return common(state, tok);
732 }
733 
734 static int PTRCALL
attlist2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)735 attlist2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
736          const ENCODING *enc) {
737   switch (tok) {
738   case XML_TOK_PROLOG_S:
739     return XML_ROLE_ATTLIST_NONE;
740   case XML_TOK_NAME: {
741     static const char *const types[] = {
742         KW_CDATA,  KW_ID,       KW_IDREF,   KW_IDREFS,
743         KW_ENTITY, KW_ENTITIES, KW_NMTOKEN, KW_NMTOKENS,
744     };
745     int i;
746     for (i = 0; i < (int)(sizeof(types) / sizeof(types[0])); i++)
747       if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
748         state->handler = attlist8;
749         return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
750       }
751   }
752     if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
753       state->handler = attlist5;
754       return XML_ROLE_ATTLIST_NONE;
755     }
756     break;
757   case XML_TOK_OPEN_PAREN:
758     state->handler = attlist3;
759     return XML_ROLE_ATTLIST_NONE;
760   }
761   return common(state, tok);
762 }
763 
764 static int PTRCALL
attlist3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)765 attlist3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
766          const ENCODING *enc) {
767   UNUSED_P(ptr);
768   UNUSED_P(end);
769   UNUSED_P(enc);
770   switch (tok) {
771   case XML_TOK_PROLOG_S:
772     return XML_ROLE_ATTLIST_NONE;
773   case XML_TOK_NMTOKEN:
774   case XML_TOK_NAME:
775   case XML_TOK_PREFIXED_NAME:
776     state->handler = attlist4;
777     return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
778   }
779   return common(state, tok);
780 }
781 
782 static int PTRCALL
attlist4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)783 attlist4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
784          const ENCODING *enc) {
785   UNUSED_P(ptr);
786   UNUSED_P(end);
787   UNUSED_P(enc);
788   switch (tok) {
789   case XML_TOK_PROLOG_S:
790     return XML_ROLE_ATTLIST_NONE;
791   case XML_TOK_CLOSE_PAREN:
792     state->handler = attlist8;
793     return XML_ROLE_ATTLIST_NONE;
794   case XML_TOK_OR:
795     state->handler = attlist3;
796     return XML_ROLE_ATTLIST_NONE;
797   }
798   return common(state, tok);
799 }
800 
801 static int PTRCALL
attlist5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)802 attlist5(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
803          const ENCODING *enc) {
804   UNUSED_P(ptr);
805   UNUSED_P(end);
806   UNUSED_P(enc);
807   switch (tok) {
808   case XML_TOK_PROLOG_S:
809     return XML_ROLE_ATTLIST_NONE;
810   case XML_TOK_OPEN_PAREN:
811     state->handler = attlist6;
812     return XML_ROLE_ATTLIST_NONE;
813   }
814   return common(state, tok);
815 }
816 
817 static int PTRCALL
attlist6(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)818 attlist6(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
819          const ENCODING *enc) {
820   UNUSED_P(ptr);
821   UNUSED_P(end);
822   UNUSED_P(enc);
823   switch (tok) {
824   case XML_TOK_PROLOG_S:
825     return XML_ROLE_ATTLIST_NONE;
826   case XML_TOK_NAME:
827     state->handler = attlist7;
828     return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
829   }
830   return common(state, tok);
831 }
832 
833 static int PTRCALL
attlist7(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)834 attlist7(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
835          const ENCODING *enc) {
836   UNUSED_P(ptr);
837   UNUSED_P(end);
838   UNUSED_P(enc);
839   switch (tok) {
840   case XML_TOK_PROLOG_S:
841     return XML_ROLE_ATTLIST_NONE;
842   case XML_TOK_CLOSE_PAREN:
843     state->handler = attlist8;
844     return XML_ROLE_ATTLIST_NONE;
845   case XML_TOK_OR:
846     state->handler = attlist6;
847     return XML_ROLE_ATTLIST_NONE;
848   }
849   return common(state, tok);
850 }
851 
852 /* default value */
853 static int PTRCALL
attlist8(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)854 attlist8(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
855          const ENCODING *enc) {
856   switch (tok) {
857   case XML_TOK_PROLOG_S:
858     return XML_ROLE_ATTLIST_NONE;
859   case XML_TOK_POUND_NAME:
860     if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end,
861                             KW_IMPLIED)) {
862       state->handler = attlist1;
863       return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
864     }
865     if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end,
866                             KW_REQUIRED)) {
867       state->handler = attlist1;
868       return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
869     }
870     if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end,
871                             KW_FIXED)) {
872       state->handler = attlist9;
873       return XML_ROLE_ATTLIST_NONE;
874     }
875     break;
876   case XML_TOK_LITERAL:
877     state->handler = attlist1;
878     return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
879   }
880   return common(state, tok);
881 }
882 
883 static int PTRCALL
attlist9(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)884 attlist9(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
885          const ENCODING *enc) {
886   UNUSED_P(ptr);
887   UNUSED_P(end);
888   UNUSED_P(enc);
889   switch (tok) {
890   case XML_TOK_PROLOG_S:
891     return XML_ROLE_ATTLIST_NONE;
892   case XML_TOK_LITERAL:
893     state->handler = attlist1;
894     return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
895   }
896   return common(state, tok);
897 }
898 
899 static int PTRCALL
element0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)900 element0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
901          const ENCODING *enc) {
902   UNUSED_P(ptr);
903   UNUSED_P(end);
904   UNUSED_P(enc);
905   switch (tok) {
906   case XML_TOK_PROLOG_S:
907     return XML_ROLE_ELEMENT_NONE;
908   case XML_TOK_NAME:
909   case XML_TOK_PREFIXED_NAME:
910     state->handler = element1;
911     return XML_ROLE_ELEMENT_NAME;
912   }
913   return common(state, tok);
914 }
915 
916 static int PTRCALL
element1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)917 element1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
918          const ENCODING *enc) {
919   switch (tok) {
920   case XML_TOK_PROLOG_S:
921     return XML_ROLE_ELEMENT_NONE;
922   case XML_TOK_NAME:
923     if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
924       state->handler = declClose;
925       state->role_none = XML_ROLE_ELEMENT_NONE;
926       return XML_ROLE_CONTENT_EMPTY;
927     }
928     if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
929       state->handler = declClose;
930       state->role_none = XML_ROLE_ELEMENT_NONE;
931       return XML_ROLE_CONTENT_ANY;
932     }
933     break;
934   case XML_TOK_OPEN_PAREN:
935     state->handler = element2;
936     state->level = 1;
937     return XML_ROLE_GROUP_OPEN;
938   }
939   return common(state, tok);
940 }
941 
942 static int PTRCALL
element2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)943 element2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
944          const ENCODING *enc) {
945   switch (tok) {
946   case XML_TOK_PROLOG_S:
947     return XML_ROLE_ELEMENT_NONE;
948   case XML_TOK_POUND_NAME:
949     if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end,
950                             KW_PCDATA)) {
951       state->handler = element3;
952       return XML_ROLE_CONTENT_PCDATA;
953     }
954     break;
955   case XML_TOK_OPEN_PAREN:
956     state->level = 2;
957     state->handler = element6;
958     return XML_ROLE_GROUP_OPEN;
959   case XML_TOK_NAME:
960   case XML_TOK_PREFIXED_NAME:
961     state->handler = element7;
962     return XML_ROLE_CONTENT_ELEMENT;
963   case XML_TOK_NAME_QUESTION:
964     state->handler = element7;
965     return XML_ROLE_CONTENT_ELEMENT_OPT;
966   case XML_TOK_NAME_ASTERISK:
967     state->handler = element7;
968     return XML_ROLE_CONTENT_ELEMENT_REP;
969   case XML_TOK_NAME_PLUS:
970     state->handler = element7;
971     return XML_ROLE_CONTENT_ELEMENT_PLUS;
972   }
973   return common(state, tok);
974 }
975 
976 static int PTRCALL
element3(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)977 element3(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
978          const ENCODING *enc) {
979   UNUSED_P(ptr);
980   UNUSED_P(end);
981   UNUSED_P(enc);
982   switch (tok) {
983   case XML_TOK_PROLOG_S:
984     return XML_ROLE_ELEMENT_NONE;
985   case XML_TOK_CLOSE_PAREN:
986     state->handler = declClose;
987     state->role_none = XML_ROLE_ELEMENT_NONE;
988     return XML_ROLE_GROUP_CLOSE;
989   case XML_TOK_CLOSE_PAREN_ASTERISK:
990     state->handler = declClose;
991     state->role_none = XML_ROLE_ELEMENT_NONE;
992     return XML_ROLE_GROUP_CLOSE_REP;
993   case XML_TOK_OR:
994     state->handler = element4;
995     return XML_ROLE_ELEMENT_NONE;
996   }
997   return common(state, tok);
998 }
999 
1000 static int PTRCALL
element4(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1001 element4(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1002          const ENCODING *enc) {
1003   UNUSED_P(ptr);
1004   UNUSED_P(end);
1005   UNUSED_P(enc);
1006   switch (tok) {
1007   case XML_TOK_PROLOG_S:
1008     return XML_ROLE_ELEMENT_NONE;
1009   case XML_TOK_NAME:
1010   case XML_TOK_PREFIXED_NAME:
1011     state->handler = element5;
1012     return XML_ROLE_CONTENT_ELEMENT;
1013   }
1014   return common(state, tok);
1015 }
1016 
1017 static int PTRCALL
element5(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1018 element5(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1019          const ENCODING *enc) {
1020   UNUSED_P(ptr);
1021   UNUSED_P(end);
1022   UNUSED_P(enc);
1023   switch (tok) {
1024   case XML_TOK_PROLOG_S:
1025     return XML_ROLE_ELEMENT_NONE;
1026   case XML_TOK_CLOSE_PAREN_ASTERISK:
1027     state->handler = declClose;
1028     state->role_none = XML_ROLE_ELEMENT_NONE;
1029     return XML_ROLE_GROUP_CLOSE_REP;
1030   case XML_TOK_OR:
1031     state->handler = element4;
1032     return XML_ROLE_ELEMENT_NONE;
1033   }
1034   return common(state, tok);
1035 }
1036 
1037 static int PTRCALL
element6(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1038 element6(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1039          const ENCODING *enc) {
1040   UNUSED_P(ptr);
1041   UNUSED_P(end);
1042   UNUSED_P(enc);
1043   switch (tok) {
1044   case XML_TOK_PROLOG_S:
1045     return XML_ROLE_ELEMENT_NONE;
1046   case XML_TOK_OPEN_PAREN:
1047     state->level += 1;
1048     return XML_ROLE_GROUP_OPEN;
1049   case XML_TOK_NAME:
1050   case XML_TOK_PREFIXED_NAME:
1051     state->handler = element7;
1052     return XML_ROLE_CONTENT_ELEMENT;
1053   case XML_TOK_NAME_QUESTION:
1054     state->handler = element7;
1055     return XML_ROLE_CONTENT_ELEMENT_OPT;
1056   case XML_TOK_NAME_ASTERISK:
1057     state->handler = element7;
1058     return XML_ROLE_CONTENT_ELEMENT_REP;
1059   case XML_TOK_NAME_PLUS:
1060     state->handler = element7;
1061     return XML_ROLE_CONTENT_ELEMENT_PLUS;
1062   }
1063   return common(state, tok);
1064 }
1065 
1066 static int PTRCALL
element7(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1067 element7(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1068          const ENCODING *enc) {
1069   UNUSED_P(ptr);
1070   UNUSED_P(end);
1071   UNUSED_P(enc);
1072   switch (tok) {
1073   case XML_TOK_PROLOG_S:
1074     return XML_ROLE_ELEMENT_NONE;
1075   case XML_TOK_CLOSE_PAREN:
1076     state->level -= 1;
1077     if (state->level == 0) {
1078       state->handler = declClose;
1079       state->role_none = XML_ROLE_ELEMENT_NONE;
1080     }
1081     return XML_ROLE_GROUP_CLOSE;
1082   case XML_TOK_CLOSE_PAREN_ASTERISK:
1083     state->level -= 1;
1084     if (state->level == 0) {
1085       state->handler = declClose;
1086       state->role_none = XML_ROLE_ELEMENT_NONE;
1087     }
1088     return XML_ROLE_GROUP_CLOSE_REP;
1089   case XML_TOK_CLOSE_PAREN_QUESTION:
1090     state->level -= 1;
1091     if (state->level == 0) {
1092       state->handler = declClose;
1093       state->role_none = XML_ROLE_ELEMENT_NONE;
1094     }
1095     return XML_ROLE_GROUP_CLOSE_OPT;
1096   case XML_TOK_CLOSE_PAREN_PLUS:
1097     state->level -= 1;
1098     if (state->level == 0) {
1099       state->handler = declClose;
1100       state->role_none = XML_ROLE_ELEMENT_NONE;
1101     }
1102     return XML_ROLE_GROUP_CLOSE_PLUS;
1103   case XML_TOK_COMMA:
1104     state->handler = element6;
1105     return XML_ROLE_GROUP_SEQUENCE;
1106   case XML_TOK_OR:
1107     state->handler = element6;
1108     return XML_ROLE_GROUP_CHOICE;
1109   }
1110   return common(state, tok);
1111 }
1112 
1113 #ifdef XML_DTD
1114 
1115 static int PTRCALL
condSect0(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1116 condSect0(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1117           const ENCODING *enc) {
1118   switch (tok) {
1119   case XML_TOK_PROLOG_S:
1120     return XML_ROLE_NONE;
1121   case XML_TOK_NAME:
1122     if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) {
1123       state->handler = condSect1;
1124       return XML_ROLE_NONE;
1125     }
1126     if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) {
1127       state->handler = condSect2;
1128       return XML_ROLE_NONE;
1129     }
1130     break;
1131   }
1132   return common(state, tok);
1133 }
1134 
1135 static int PTRCALL
condSect1(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1136 condSect1(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1137           const ENCODING *enc) {
1138   UNUSED_P(ptr);
1139   UNUSED_P(end);
1140   UNUSED_P(enc);
1141   switch (tok) {
1142   case XML_TOK_PROLOG_S:
1143     return XML_ROLE_NONE;
1144   case XML_TOK_OPEN_BRACKET:
1145     state->handler = externalSubset1;
1146     state->includeLevel += 1;
1147     return XML_ROLE_NONE;
1148   }
1149   return common(state, tok);
1150 }
1151 
1152 static int PTRCALL
condSect2(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1153 condSect2(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1154           const ENCODING *enc) {
1155   UNUSED_P(ptr);
1156   UNUSED_P(end);
1157   UNUSED_P(enc);
1158   switch (tok) {
1159   case XML_TOK_PROLOG_S:
1160     return XML_ROLE_NONE;
1161   case XML_TOK_OPEN_BRACKET:
1162     state->handler = externalSubset1;
1163     return XML_ROLE_IGNORE_SECT;
1164   }
1165   return common(state, tok);
1166 }
1167 
1168 #endif /* XML_DTD */
1169 
1170 static int PTRCALL
declClose(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1171 declClose(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1172           const ENCODING *enc) {
1173   UNUSED_P(ptr);
1174   UNUSED_P(end);
1175   UNUSED_P(enc);
1176   switch (tok) {
1177   case XML_TOK_PROLOG_S:
1178     return state->role_none;
1179   case XML_TOK_DECL_CLOSE:
1180     setTopLevel(state);
1181     return state->role_none;
1182   }
1183   return common(state, tok);
1184 }
1185 
1186 /* This function will only be invoked if the internal logic of the
1187  * parser has broken down.  It is used in two cases:
1188  *
1189  * 1: When the XML prolog has been finished.  At this point the
1190  * processor (the parser level above these role handlers) should
1191  * switch from prologProcessor to contentProcessor and reinitialise
1192  * the handler function.
1193  *
1194  * 2: When an error has been detected (via common() below).  At this
1195  * point again the processor should be switched to errorProcessor,
1196  * which will never call a handler.
1197  *
1198  * The result of this is that error() can only be called if the
1199  * processor switch failed to happen, which is an internal error and
1200  * therefore we shouldn't be able to provoke it simply by using the
1201  * library.  It is a necessary backstop, however, so we merely exclude
1202  * it from the coverage statistics.
1203  *
1204  * LCOV_EXCL_START
1205  */
1206 static int PTRCALL
error(PROLOG_STATE * state,int tok,const char * ptr,const char * end,const ENCODING * enc)1207 error(PROLOG_STATE *state, int tok, const char *ptr, const char *end,
1208       const ENCODING *enc) {
1209   UNUSED_P(state);
1210   UNUSED_P(tok);
1211   UNUSED_P(ptr);
1212   UNUSED_P(end);
1213   UNUSED_P(enc);
1214   return XML_ROLE_NONE;
1215 }
1216 /* LCOV_EXCL_STOP */
1217 
1218 static int FASTCALL
common(PROLOG_STATE * state,int tok)1219 common(PROLOG_STATE *state, int tok) {
1220 #ifdef XML_DTD
1221   if (! state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
1222     return XML_ROLE_INNER_PARAM_ENTITY_REF;
1223 #else
1224   UNUSED_P(tok);
1225 #endif
1226   state->handler = error;
1227   return XML_ROLE_ERROR;
1228 }
1229 
1230 void
XmlPrologStateInit(PROLOG_STATE * state)1231 XmlPrologStateInit(PROLOG_STATE *state) {
1232   state->handler = prolog0;
1233 #ifdef XML_DTD
1234   state->documentEntity = 1;
1235   state->includeLevel = 0;
1236   state->inEntityValue = 0;
1237 #endif /* XML_DTD */
1238 }
1239 
1240 #ifdef XML_DTD
1241 
1242 void
XmlPrologStateInitExternalEntity(PROLOG_STATE * state)1243 XmlPrologStateInitExternalEntity(PROLOG_STATE *state) {
1244   state->handler = externalSubset0;
1245   state->documentEntity = 0;
1246   state->includeLevel = 0;
1247 }
1248 
1249 #endif /* XML_DTD */
1250