1 #include "Python.h"
2 #include <ctype.h>
3 
4 #include "frameobject.h"
5 #include "expat.h"
6 
7 #include "pyexpat.h"
8 
9 #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
10 
11 #ifndef PyDoc_STRVAR
12 
13 /*
14  * fdrake says:
15  * Don't change the PyDoc_STR macro definition to (str), because
16  * '''the parentheses cause compile failures
17  * ("non-constant static initializer" or something like that)
18  * on some platforms (Irix?)'''
19  */
20 #define PyDoc_STR(str)         str
21 #define PyDoc_VAR(name)        static char name[]
22 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
23 #endif
24 
25 #if (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 2)
26 /* In Python 2.0 and  2.1, disabling Unicode was not possible. */
27 #define Py_USING_UNICODE
28 #else
29 #define FIX_TRACE
30 #endif
31 
32 enum HandlerTypes {
33     StartElement,
34     EndElement,
35     ProcessingInstruction,
36     CharacterData,
37     UnparsedEntityDecl,
38     NotationDecl,
39     StartNamespaceDecl,
40     EndNamespaceDecl,
41     Comment,
42     StartCdataSection,
43     EndCdataSection,
44     Default,
45     DefaultHandlerExpand,
46     NotStandalone,
47     ExternalEntityRef,
48     StartDoctypeDecl,
49     EndDoctypeDecl,
50     EntityDecl,
51     XmlDecl,
52     ElementDecl,
53     AttlistDecl,
54 #if XML_COMBINED_VERSION >= 19504
55     SkippedEntity,
56 #endif
57     _DummyDecl
58 };
59 
60 static PyObject *ErrorObject;
61 
62 /* ----------------------------------------------------- */
63 
64 /* Declarations for objects of type xmlparser */
65 
66 typedef struct {
67     PyObject_HEAD
68 
69     XML_Parser itself;
70     int returns_unicode;        /* True if Unicode strings are returned;
71                                    if false, UTF-8 strings are returned */
72     int ordered_attributes;     /* Return attributes as a list. */
73     int specified_attributes;   /* Report only specified attributes. */
74     int in_callback;            /* Is a callback active? */
75     int ns_prefixes;            /* Namespace-triplets mode? */
76     XML_Char *buffer;           /* Buffer used when accumulating characters */
77                                 /* NULL if not enabled */
78     int buffer_size;            /* Size of buffer, in XML_Char units */
79     int buffer_used;            /* Buffer units in use */
80     PyObject *intern;           /* Dictionary to intern strings */
81     PyObject **handlers;
82 } xmlparseobject;
83 
84 #define CHARACTER_DATA_BUFFER_SIZE 8192
85 
86 static PyTypeObject Xmlparsetype;
87 
88 typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
89 typedef void* xmlhandler;
90 
91 struct HandlerInfo {
92     const char *name;
93     xmlhandlersetter setter;
94     xmlhandler handler;
95     PyCodeObject *tb_code;
96     PyObject *nameobj;
97 };
98 
99 static struct HandlerInfo handler_info[64];
100 
101 /* Set an integer attribute on the error object; return true on success,
102  * false on an exception.
103  */
104 static int
set_error_attr(PyObject * err,char * name,int value)105 set_error_attr(PyObject *err, char *name, int value)
106 {
107     PyObject *v = PyInt_FromLong(value);
108 
109     if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
110         Py_XDECREF(v);
111         return 0;
112     }
113     Py_DECREF(v);
114     return 1;
115 }
116 
117 /* Build and set an Expat exception, including positioning
118  * information.  Always returns NULL.
119  */
120 static PyObject *
set_error(xmlparseobject * self,enum XML_Error code)121 set_error(xmlparseobject *self, enum XML_Error code)
122 {
123     PyObject *err;
124     char buffer[256];
125     XML_Parser parser = self->itself;
126     int lineno = XML_GetErrorLineNumber(parser);
127     int column = XML_GetErrorColumnNumber(parser);
128 
129     /* There is no risk of overflowing this buffer, since
130        even for 64-bit integers, there is sufficient space. */
131     sprintf(buffer, "%.200s: line %i, column %i",
132             XML_ErrorString(code), lineno, column);
133     err = PyObject_CallFunction(ErrorObject, "s", buffer);
134     if (  err != NULL
135           && set_error_attr(err, "code", code)
136           && set_error_attr(err, "offset", column)
137           && set_error_attr(err, "lineno", lineno)) {
138         PyErr_SetObject(ErrorObject, err);
139     }
140     Py_XDECREF(err);
141     return NULL;
142 }
143 
144 static int
have_handler(xmlparseobject * self,int type)145 have_handler(xmlparseobject *self, int type)
146 {
147     PyObject *handler = self->handlers[type];
148     return handler != NULL;
149 }
150 
151 static PyObject *
get_handler_name(struct HandlerInfo * hinfo)152 get_handler_name(struct HandlerInfo *hinfo)
153 {
154     PyObject *name = hinfo->nameobj;
155     if (name == NULL) {
156         name = PyString_FromString(hinfo->name);
157         hinfo->nameobj = name;
158     }
159     Py_XINCREF(name);
160     return name;
161 }
162 
163 
164 #ifdef Py_USING_UNICODE
165 /* Convert a string of XML_Chars into a Unicode string.
166    Returns None if str is a null pointer. */
167 
168 static PyObject *
conv_string_to_unicode(const XML_Char * str)169 conv_string_to_unicode(const XML_Char *str)
170 {
171     /* XXX currently this code assumes that XML_Char is 8-bit,
172        and hence in UTF-8.  */
173     /* UTF-8 from Expat, Unicode desired */
174     if (str == NULL) {
175         Py_INCREF(Py_None);
176         return Py_None;
177     }
178     return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
179 }
180 
181 static PyObject *
conv_string_len_to_unicode(const XML_Char * str,int len)182 conv_string_len_to_unicode(const XML_Char *str, int len)
183 {
184     /* XXX currently this code assumes that XML_Char is 8-bit,
185        and hence in UTF-8.  */
186     /* UTF-8 from Expat, Unicode desired */
187     if (str == NULL) {
188         Py_INCREF(Py_None);
189         return Py_None;
190     }
191     return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
192 }
193 #endif
194 
195 /* Convert a string of XML_Chars into an 8-bit Python string.
196    Returns None if str is a null pointer. */
197 
198 static PyObject *
conv_string_to_utf8(const XML_Char * str)199 conv_string_to_utf8(const XML_Char *str)
200 {
201     /* XXX currently this code assumes that XML_Char is 8-bit,
202        and hence in UTF-8.  */
203     /* UTF-8 from Expat, UTF-8 desired */
204     if (str == NULL) {
205         Py_INCREF(Py_None);
206         return Py_None;
207     }
208     return PyString_FromString(str);
209 }
210 
211 static PyObject *
conv_string_len_to_utf8(const XML_Char * str,int len)212 conv_string_len_to_utf8(const XML_Char *str, int len)
213 {
214     /* XXX currently this code assumes that XML_Char is 8-bit,
215        and hence in UTF-8.  */
216     /* UTF-8 from Expat, UTF-8 desired */
217     if (str == NULL) {
218         Py_INCREF(Py_None);
219         return Py_None;
220     }
221     return PyString_FromStringAndSize((const char *)str, len);
222 }
223 
224 /* Callback routines */
225 
226 static void clear_handlers(xmlparseobject *self, int initial);
227 
228 /* This handler is used when an error has been detected, in the hope
229    that actual parsing can be terminated early.  This will only help
230    if an external entity reference is encountered. */
231 static int
error_external_entity_ref_handler(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)232 error_external_entity_ref_handler(XML_Parser parser,
233                                   const XML_Char *context,
234                                   const XML_Char *base,
235                                   const XML_Char *systemId,
236                                   const XML_Char *publicId)
237 {
238     return 0;
239 }
240 
241 /* Dummy character data handler used when an error (exception) has
242    been detected, and the actual parsing can be terminated early.
243    This is needed since character data handler can't be safely removed
244    from within the character data handler, but can be replaced.  It is
245    used only from the character data handler trampoline, and must be
246    used right after `flag_error()` is called. */
247 static void
noop_character_data_handler(void * userData,const XML_Char * data,int len)248 noop_character_data_handler(void *userData, const XML_Char *data, int len)
249 {
250     /* Do nothing. */
251 }
252 
253 static void
flag_error(xmlparseobject * self)254 flag_error(xmlparseobject *self)
255 {
256     clear_handlers(self, 0);
257     XML_SetExternalEntityRefHandler(self->itself,
258                                     error_external_entity_ref_handler);
259 }
260 
261 static PyCodeObject*
getcode(enum HandlerTypes slot,char * func_name,int lineno)262 getcode(enum HandlerTypes slot, char* func_name, int lineno)
263 {
264     if (handler_info[slot].tb_code == NULL) {
265         handler_info[slot].tb_code =
266             PyCode_NewEmpty(__FILE__, func_name, lineno);
267     }
268     return handler_info[slot].tb_code;
269 }
270 
271 #ifdef FIX_TRACE
272 static int
trace_frame(PyThreadState * tstate,PyFrameObject * f,int code,PyObject * val)273 trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
274 {
275     int result = 0;
276     if (!tstate->use_tracing || tstate->tracing)
277         return 0;
278     if (tstate->c_profilefunc != NULL) {
279         tstate->tracing++;
280         result = tstate->c_profilefunc(tstate->c_profileobj,
281                                        f, code , val);
282         tstate->use_tracing = ((tstate->c_tracefunc != NULL)
283                                || (tstate->c_profilefunc != NULL));
284         tstate->tracing--;
285         if (result)
286             return result;
287     }
288     if (tstate->c_tracefunc != NULL) {
289         tstate->tracing++;
290         result = tstate->c_tracefunc(tstate->c_traceobj,
291                                      f, code , val);
292         tstate->use_tracing = ((tstate->c_tracefunc != NULL)
293                                || (tstate->c_profilefunc != NULL));
294         tstate->tracing--;
295     }
296     return result;
297 }
298 
299 static int
trace_frame_exc(PyThreadState * tstate,PyFrameObject * f)300 trace_frame_exc(PyThreadState *tstate, PyFrameObject *f)
301 {
302     PyObject *type, *value, *traceback, *arg;
303     int err;
304 
305     if (tstate->c_tracefunc == NULL)
306         return 0;
307 
308     PyErr_Fetch(&type, &value, &traceback);
309     if (value == NULL) {
310         value = Py_None;
311         Py_INCREF(value);
312     }
313 #if PY_VERSION_HEX < 0x02040000
314     arg = Py_BuildValue("(OOO)", type, value, traceback);
315 #else
316     arg = PyTuple_Pack(3, type, value, traceback);
317 #endif
318     if (arg == NULL) {
319         PyErr_Restore(type, value, traceback);
320         return 0;
321     }
322     err = trace_frame(tstate, f, PyTrace_EXCEPTION, arg);
323     Py_DECREF(arg);
324     if (err == 0)
325         PyErr_Restore(type, value, traceback);
326     else {
327         Py_XDECREF(type);
328         Py_XDECREF(value);
329         Py_XDECREF(traceback);
330     }
331     return err;
332 }
333 #endif
334 
335 static PyObject*
call_with_frame(PyCodeObject * c,PyObject * func,PyObject * args,xmlparseobject * self)336 call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args,
337                 xmlparseobject *self)
338 {
339     PyThreadState *tstate = PyThreadState_GET();
340     PyFrameObject *f;
341     PyObject *res;
342 
343     if (c == NULL)
344         return NULL;
345 
346     f = PyFrame_New(tstate, c, PyEval_GetGlobals(), NULL);
347     if (f == NULL)
348         return NULL;
349     tstate->frame = f;
350 #ifdef FIX_TRACE
351     if (trace_frame(tstate, f, PyTrace_CALL, Py_None) < 0) {
352         return NULL;
353     }
354 #endif
355     res = PyEval_CallObject(func, args);
356     if (res == NULL) {
357         if (tstate->curexc_traceback == NULL)
358             PyTraceBack_Here(f);
359         XML_StopParser(self->itself, XML_FALSE);
360 #ifdef FIX_TRACE
361         if (trace_frame_exc(tstate, f) < 0) {
362             return NULL;
363         }
364     }
365     else {
366         if (trace_frame(tstate, f, PyTrace_RETURN, res) < 0) {
367             Py_XDECREF(res);
368             res = NULL;
369         }
370     }
371 #else
372     }
373 #endif
374     tstate->frame = f->f_back;
375     Py_DECREF(f);
376     return res;
377 }
378 
379 #ifndef Py_USING_UNICODE
380 #define STRING_CONV_FUNC conv_string_to_utf8
381 #else
382 /* Python 2.0 and later versions, when built with Unicode support */
383 #define STRING_CONV_FUNC (self->returns_unicode \
384                           ? conv_string_to_unicode : conv_string_to_utf8)
385 #endif
386 
387 static PyObject*
string_intern(xmlparseobject * self,const char * str)388 string_intern(xmlparseobject *self, const char* str)
389 {
390     PyObject *result = STRING_CONV_FUNC(str);
391     PyObject *value;
392     /* result can be NULL if the unicode conversion failed. */
393     if (!result)
394         return result;
395     if (!self->intern)
396         return result;
397     value = PyDict_GetItem(self->intern, result);
398     if (!value) {
399         if (PyDict_SetItem(self->intern, result, result) == 0)
400             return result;
401         else
402             return NULL;
403     }
404     Py_INCREF(value);
405     Py_DECREF(result);
406     return value;
407 }
408 
409 /* Return 0 on success, -1 on exception.
410  * flag_error() will be called before return if needed.
411  */
412 static int
call_character_handler(xmlparseobject * self,const XML_Char * buffer,int len)413 call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
414 {
415     PyObject *args;
416     PyObject *temp;
417 
418     if (!have_handler(self, CharacterData))
419         return -1;
420 
421     args = PyTuple_New(1);
422     if (args == NULL)
423         return -1;
424 #ifdef Py_USING_UNICODE
425     temp = (self->returns_unicode
426             ? conv_string_len_to_unicode(buffer, len)
427             : conv_string_len_to_utf8(buffer, len));
428 #else
429     temp = conv_string_len_to_utf8(buffer, len);
430 #endif
431     if (temp == NULL) {
432         Py_DECREF(args);
433         flag_error(self);
434         XML_SetCharacterDataHandler(self->itself,
435                                     noop_character_data_handler);
436         return -1;
437     }
438     PyTuple_SET_ITEM(args, 0, temp);
439     /* temp is now a borrowed reference; consider it unused. */
440     self->in_callback = 1;
441     temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
442                            self->handlers[CharacterData], args, self);
443     /* temp is an owned reference again, or NULL */
444     self->in_callback = 0;
445     Py_DECREF(args);
446     if (temp == NULL) {
447         flag_error(self);
448         XML_SetCharacterDataHandler(self->itself,
449                                     noop_character_data_handler);
450         return -1;
451     }
452     Py_DECREF(temp);
453     return 0;
454 }
455 
456 static int
flush_character_buffer(xmlparseobject * self)457 flush_character_buffer(xmlparseobject *self)
458 {
459     int rc;
460     if (self->buffer == NULL || self->buffer_used == 0)
461         return 0;
462     rc = call_character_handler(self, self->buffer, self->buffer_used);
463     self->buffer_used = 0;
464     return rc;
465 }
466 
467 static void
my_CharacterDataHandler(void * userData,const XML_Char * data,int len)468 my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
469 {
470     xmlparseobject *self = (xmlparseobject *) userData;
471     if (self->buffer == NULL)
472         call_character_handler(self, data, len);
473     else {
474         if ((self->buffer_used + len) > self->buffer_size) {
475             if (flush_character_buffer(self) < 0)
476                 return;
477             /* handler might have changed; drop the rest on the floor
478              * if there isn't a handler anymore
479              */
480             if (!have_handler(self, CharacterData))
481                 return;
482         }
483         if (len > self->buffer_size) {
484             call_character_handler(self, data, len);
485             self->buffer_used = 0;
486         }
487         else {
488             memcpy(self->buffer + self->buffer_used,
489                    data, len * sizeof(XML_Char));
490             self->buffer_used += len;
491         }
492     }
493 }
494 
495 static void
my_StartElementHandler(void * userData,const XML_Char * name,const XML_Char * atts[])496 my_StartElementHandler(void *userData,
497                        const XML_Char *name, const XML_Char *atts[])
498 {
499     xmlparseobject *self = (xmlparseobject *)userData;
500 
501     if (have_handler(self, StartElement)) {
502         PyObject *container, *rv, *args;
503         int i, max;
504 
505         if (flush_character_buffer(self) < 0)
506             return;
507         /* Set max to the number of slots filled in atts[]; max/2 is
508          * the number of attributes we need to process.
509          */
510         if (self->specified_attributes) {
511             max = XML_GetSpecifiedAttributeCount(self->itself);
512         }
513         else {
514             max = 0;
515             while (atts[max] != NULL)
516                 max += 2;
517         }
518         /* Build the container. */
519         if (self->ordered_attributes)
520             container = PyList_New(max);
521         else
522             container = PyDict_New();
523         if (container == NULL) {
524             flag_error(self);
525             return;
526         }
527         for (i = 0; i < max; i += 2) {
528             PyObject *n = string_intern(self, (XML_Char *) atts[i]);
529             PyObject *v;
530             if (n == NULL) {
531                 flag_error(self);
532                 Py_DECREF(container);
533                 return;
534             }
535             v = STRING_CONV_FUNC((XML_Char *) atts[i+1]);
536             if (v == NULL) {
537                 flag_error(self);
538                 Py_DECREF(container);
539                 Py_DECREF(n);
540                 return;
541             }
542             if (self->ordered_attributes) {
543                 PyList_SET_ITEM(container, i, n);
544                 PyList_SET_ITEM(container, i+1, v);
545             }
546             else if (PyDict_SetItem(container, n, v)) {
547                 flag_error(self);
548                 Py_DECREF(n);
549                 Py_DECREF(v);
550                 return;
551             }
552             else {
553                 Py_DECREF(n);
554                 Py_DECREF(v);
555             }
556         }
557         args = string_intern(self, name);
558         if (args != NULL)
559             args = Py_BuildValue("(NN)", args, container);
560         if (args == NULL) {
561             Py_DECREF(container);
562             return;
563         }
564         /* Container is now a borrowed reference; ignore it. */
565         self->in_callback = 1;
566         rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
567                              self->handlers[StartElement], args, self);
568         self->in_callback = 0;
569         Py_DECREF(args);
570         if (rv == NULL) {
571             flag_error(self);
572             return;
573         }
574         Py_DECREF(rv);
575     }
576 }
577 
578 #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
579                 RETURN, GETUSERDATA) \
580 static RC \
581 my_##NAME##Handler PARAMS {\
582     xmlparseobject *self = GETUSERDATA ; \
583     PyObject *args = NULL; \
584     PyObject *rv = NULL; \
585     INIT \
586 \
587     if (have_handler(self, NAME)) { \
588         if (flush_character_buffer(self) < 0) \
589             return RETURN; \
590         args = Py_BuildValue PARAM_FORMAT ;\
591         if (!args) { flag_error(self); return RETURN;} \
592         self->in_callback = 1; \
593         rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
594                              self->handlers[NAME], args, self); \
595         self->in_callback = 0; \
596         Py_DECREF(args); \
597         if (rv == NULL) { \
598             flag_error(self); \
599             return RETURN; \
600         } \
601         CONVERSION \
602         Py_DECREF(rv); \
603     } \
604     return RETURN; \
605 }
606 
607 #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
608         RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
609         (xmlparseobject *)userData)
610 
611 #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
612         RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
613                         rc = PyInt_AsLong(rv);, rc, \
614         (xmlparseobject *)userData)
615 
616 VOID_HANDLER(EndElement,
617              (void *userData, const XML_Char *name),
618              ("(N)", string_intern(self, name)))
619 
620 VOID_HANDLER(ProcessingInstruction,
621              (void *userData,
622               const XML_Char *target,
623               const XML_Char *data),
624              ("(NO&)", string_intern(self, target), STRING_CONV_FUNC,data))
625 
626 VOID_HANDLER(UnparsedEntityDecl,
627              (void *userData,
628               const XML_Char *entityName,
629               const XML_Char *base,
630               const XML_Char *systemId,
631               const XML_Char *publicId,
632               const XML_Char *notationName),
633              ("(NNNNN)",
634               string_intern(self, entityName), string_intern(self, base),
635               string_intern(self, systemId), string_intern(self, publicId),
636               string_intern(self, notationName)))
637 
638 #ifndef Py_USING_UNICODE
639 VOID_HANDLER(EntityDecl,
640              (void *userData,
641               const XML_Char *entityName,
642               int is_parameter_entity,
643               const XML_Char *value,
644               int value_length,
645               const XML_Char *base,
646               const XML_Char *systemId,
647               const XML_Char *publicId,
648               const XML_Char *notationName),
649              ("NiNNNNN",
650               string_intern(self, entityName), is_parameter_entity,
651               conv_string_len_to_utf8(value, value_length),
652               string_intern(self, base), string_intern(self, systemId),
653               string_intern(self, publicId),
654               string_intern(self, notationName)))
655 #else
656 VOID_HANDLER(EntityDecl,
657              (void *userData,
658               const XML_Char *entityName,
659               int is_parameter_entity,
660               const XML_Char *value,
661               int value_length,
662               const XML_Char *base,
663               const XML_Char *systemId,
664               const XML_Char *publicId,
665               const XML_Char *notationName),
666              ("NiNNNNN",
667               string_intern(self, entityName), is_parameter_entity,
668               (self->returns_unicode
669                ? conv_string_len_to_unicode(value, value_length)
670                : conv_string_len_to_utf8(value, value_length)),
671               string_intern(self, base), string_intern(self, systemId),
672               string_intern(self, publicId),
673               string_intern(self, notationName)))
674 #endif
675 
676 VOID_HANDLER(XmlDecl,
677              (void *userData,
678               const XML_Char *version,
679               const XML_Char *encoding,
680               int standalone),
681              ("(O&O&i)",
682               STRING_CONV_FUNC,version, STRING_CONV_FUNC,encoding,
683               standalone))
684 
685 static PyObject *
conv_content_model(XML_Content * const model,PyObject * (* conv_string)(const XML_Char *))686 conv_content_model(XML_Content * const model,
687                    PyObject *(*conv_string)(const XML_Char *))
688 {
689     PyObject *result = NULL;
690     PyObject *children = PyTuple_New(model->numchildren);
691     int i;
692 
693     if (children != NULL) {
694         assert(model->numchildren < INT_MAX);
695         for (i = 0; i < (int)model->numchildren; ++i) {
696             PyObject *child = conv_content_model(&model->children[i],
697                                                  conv_string);
698             if (child == NULL) {
699                 Py_XDECREF(children);
700                 return NULL;
701             }
702             PyTuple_SET_ITEM(children, i, child);
703         }
704         result = Py_BuildValue("(iiO&N)",
705                                model->type, model->quant,
706                                conv_string,model->name, children);
707     }
708     return result;
709 }
710 
711 static void
my_ElementDeclHandler(void * userData,const XML_Char * name,XML_Content * model)712 my_ElementDeclHandler(void *userData,
713                       const XML_Char *name,
714                       XML_Content *model)
715 {
716     xmlparseobject *self = (xmlparseobject *)userData;
717     PyObject *args = NULL;
718 
719     if (have_handler(self, ElementDecl)) {
720         PyObject *rv = NULL;
721         PyObject *modelobj, *nameobj;
722 
723         if (flush_character_buffer(self) < 0)
724             goto finally;
725 #ifdef Py_USING_UNICODE
726         modelobj = conv_content_model(model,
727                                       (self->returns_unicode
728                                        ? conv_string_to_unicode
729                                        : conv_string_to_utf8));
730 #else
731         modelobj = conv_content_model(model, conv_string_to_utf8);
732 #endif
733         if (modelobj == NULL) {
734             flag_error(self);
735             goto finally;
736         }
737         nameobj = string_intern(self, name);
738         if (nameobj == NULL) {
739             Py_DECREF(modelobj);
740             flag_error(self);
741             goto finally;
742         }
743         args = Py_BuildValue("NN", nameobj, modelobj);
744         if (args == NULL) {
745             Py_DECREF(modelobj);
746             flag_error(self);
747             goto finally;
748         }
749         self->in_callback = 1;
750         rv = call_with_frame(getcode(ElementDecl, "ElementDecl", __LINE__),
751                              self->handlers[ElementDecl], args, self);
752         self->in_callback = 0;
753         if (rv == NULL) {
754             flag_error(self);
755             goto finally;
756         }
757         Py_DECREF(rv);
758     }
759  finally:
760     Py_XDECREF(args);
761     XML_FreeContentModel(self->itself, model);
762     return;
763 }
764 
765 VOID_HANDLER(AttlistDecl,
766              (void *userData,
767               const XML_Char *elname,
768               const XML_Char *attname,
769               const XML_Char *att_type,
770               const XML_Char *dflt,
771               int isrequired),
772              ("(NNO&O&i)",
773               string_intern(self, elname), string_intern(self, attname),
774               STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt,
775               isrequired))
776 
777 #if XML_COMBINED_VERSION >= 19504
778 VOID_HANDLER(SkippedEntity,
779              (void *userData,
780               const XML_Char *entityName,
781               int is_parameter_entity),
782              ("Ni",
783               string_intern(self, entityName), is_parameter_entity))
784 #endif
785 
786 VOID_HANDLER(NotationDecl,
787                 (void *userData,
788                         const XML_Char *notationName,
789                         const XML_Char *base,
790                         const XML_Char *systemId,
791                         const XML_Char *publicId),
792                 ("(NNNN)",
793                  string_intern(self, notationName), string_intern(self, base),
794                  string_intern(self, systemId), string_intern(self, publicId)))
795 
796 VOID_HANDLER(StartNamespaceDecl,
797                 (void *userData,
798                       const XML_Char *prefix,
799                       const XML_Char *uri),
800                 ("(NN)",
801                  string_intern(self, prefix), string_intern(self, uri)))
802 
803 VOID_HANDLER(EndNamespaceDecl,
804                 (void *userData,
805                     const XML_Char *prefix),
806                 ("(N)", string_intern(self, prefix)))
807 
808 VOID_HANDLER(Comment,
809                (void *userData, const XML_Char *data),
810                 ("(O&)", STRING_CONV_FUNC,data))
811 
812 VOID_HANDLER(StartCdataSection,
813                (void *userData),
814                 ("()"))
815 
816 VOID_HANDLER(EndCdataSection,
817                (void *userData),
818                 ("()"))
819 
820 #ifndef Py_USING_UNICODE
821 VOID_HANDLER(Default,
822               (void *userData, const XML_Char *s, int len),
823               ("(N)", conv_string_len_to_utf8(s,len)))
824 
825 VOID_HANDLER(DefaultHandlerExpand,
826               (void *userData, const XML_Char *s, int len),
827               ("(N)", conv_string_len_to_utf8(s,len)))
828 #else
829 VOID_HANDLER(Default,
830               (void *userData, const XML_Char *s, int len),
831               ("(N)", (self->returns_unicode
832                        ? conv_string_len_to_unicode(s,len)
833                        : conv_string_len_to_utf8(s,len))))
834 
835 VOID_HANDLER(DefaultHandlerExpand,
836               (void *userData, const XML_Char *s, int len),
837               ("(N)", (self->returns_unicode
838                        ? conv_string_len_to_unicode(s,len)
839                        : conv_string_len_to_utf8(s,len))))
840 #endif
841 
842 INT_HANDLER(NotStandalone,
843                 (void *userData),
844                 ("()"))
845 
846 RC_HANDLER(int, ExternalEntityRef,
847                 (XML_Parser parser,
848                     const XML_Char *context,
849                     const XML_Char *base,
850                     const XML_Char *systemId,
851                     const XML_Char *publicId),
852                 int rc=0;,
853                 ("(O&NNN)",
854                  STRING_CONV_FUNC,context, string_intern(self, base),
855                  string_intern(self, systemId), string_intern(self, publicId)),
856                 rc = PyInt_AsLong(rv);, rc,
857                 XML_GetUserData(parser))
858 
859 /* XXX UnknownEncodingHandler */
860 
861 VOID_HANDLER(StartDoctypeDecl,
862              (void *userData, const XML_Char *doctypeName,
863               const XML_Char *sysid, const XML_Char *pubid,
864               int has_internal_subset),
865              ("(NNNi)", string_intern(self, doctypeName),
866               string_intern(self, sysid), string_intern(self, pubid),
867               has_internal_subset))
868 
869 VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
870 
871 /* ---------------------------------------------------------------- */
872 
873 static PyObject *
get_parse_result(xmlparseobject * self,int rv)874 get_parse_result(xmlparseobject *self, int rv)
875 {
876     if (PyErr_Occurred()) {
877         return NULL;
878     }
879     if (rv == 0) {
880         return set_error(self, XML_GetErrorCode(self->itself));
881     }
882     if (flush_character_buffer(self) < 0) {
883         return NULL;
884     }
885     return PyInt_FromLong(rv);
886 }
887 
888 PyDoc_STRVAR(xmlparse_Parse__doc__,
889 "Parse(data[, isfinal])\n\
890 Parse XML data.  `isfinal' should be true at end of input.");
891 
892 static PyObject *
xmlparse_Parse(xmlparseobject * self,PyObject * args)893 xmlparse_Parse(xmlparseobject *self, PyObject *args)
894 {
895     char *s;
896     int slen;
897     int isFinal = 0;
898 
899     if (!PyArg_ParseTuple(args, "s#|i:Parse", &s, &slen, &isFinal))
900         return NULL;
901 
902     return get_parse_result(self, XML_Parse(self->itself, s, slen, isFinal));
903 }
904 
905 /* File reading copied from cPickle */
906 
907 #define BUF_SIZE 2048
908 
909 static int
readinst(char * buf,int buf_size,PyObject * meth)910 readinst(char *buf, int buf_size, PyObject *meth)
911 {
912     PyObject *arg = NULL;
913     PyObject *bytes = NULL;
914     PyObject *str = NULL;
915     int len = -1;
916 
917     if ((bytes = PyInt_FromLong(buf_size)) == NULL)
918         goto finally;
919 
920     if ((arg = PyTuple_New(1)) == NULL) {
921         Py_DECREF(bytes);
922         goto finally;
923     }
924 
925     PyTuple_SET_ITEM(arg, 0, bytes);
926 
927 #if PY_VERSION_HEX < 0x02020000
928     str = PyObject_CallObject(meth, arg);
929 #else
930     str = PyObject_Call(meth, arg, NULL);
931 #endif
932     if (str == NULL)
933         goto finally;
934 
935     /* XXX what to do if it returns a Unicode string? */
936     if (!PyString_Check(str)) {
937         PyErr_Format(PyExc_TypeError,
938                      "read() did not return a string object (type=%.400s)",
939                      Py_TYPE(str)->tp_name);
940         goto finally;
941     }
942     len = PyString_GET_SIZE(str);
943     if (len > buf_size) {
944         PyErr_Format(PyExc_ValueError,
945                      "read() returned too much data: "
946                      "%i bytes requested, %i returned",
947                      buf_size, len);
948         goto finally;
949     }
950     memcpy(buf, PyString_AsString(str), len);
951 finally:
952     Py_XDECREF(arg);
953     Py_XDECREF(str);
954     return len;
955 }
956 
957 PyDoc_STRVAR(xmlparse_ParseFile__doc__,
958 "ParseFile(file)\n\
959 Parse XML data from file-like object.");
960 
961 static PyObject *
xmlparse_ParseFile(xmlparseobject * self,PyObject * f)962 xmlparse_ParseFile(xmlparseobject *self, PyObject *f)
963 {
964     int rv = 1;
965     PyObject *readmethod = NULL;
966 
967     readmethod = PyObject_GetAttrString(f, "read");
968     if (readmethod == NULL) {
969         PyErr_SetString(PyExc_TypeError,
970                         "argument must have 'read' attribute");
971         return NULL;
972 
973     }
974     for (;;) {
975         int bytes_read;
976         void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
977         if (buf == NULL) {
978             Py_XDECREF(readmethod);
979             return get_parse_result(self, 0);
980         }
981 
982         bytes_read = readinst(buf, BUF_SIZE, readmethod);
983         if (bytes_read < 0) {
984             Py_XDECREF(readmethod);
985             return NULL;
986         }
987 
988         rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
989         if (PyErr_Occurred()) {
990             Py_XDECREF(readmethod);
991             return NULL;
992         }
993 
994         if (!rv || bytes_read == 0)
995             break;
996     }
997     Py_XDECREF(readmethod);
998     return get_parse_result(self, rv);
999 }
1000 
1001 PyDoc_STRVAR(xmlparse_SetBase__doc__,
1002 "SetBase(base_url)\n\
1003 Set the base URL for the parser.");
1004 
1005 static PyObject *
xmlparse_SetBase(xmlparseobject * self,PyObject * args)1006 xmlparse_SetBase(xmlparseobject *self, PyObject *args)
1007 {
1008     char *base;
1009 
1010     if (!PyArg_ParseTuple(args, "s:SetBase", &base))
1011         return NULL;
1012     if (!XML_SetBase(self->itself, base)) {
1013         return PyErr_NoMemory();
1014     }
1015     Py_INCREF(Py_None);
1016     return Py_None;
1017 }
1018 
1019 PyDoc_STRVAR(xmlparse_GetBase__doc__,
1020 "GetBase() -> url\n\
1021 Return base URL string for the parser.");
1022 
1023 static PyObject *
xmlparse_GetBase(xmlparseobject * self,PyObject * unused)1024 xmlparse_GetBase(xmlparseobject *self, PyObject *unused)
1025 {
1026     return Py_BuildValue("z", XML_GetBase(self->itself));
1027 }
1028 
1029 PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
1030 "GetInputContext() -> string\n\
1031 Return the untranslated text of the input that caused the current event.\n\
1032 If the event was generated by a large amount of text (such as a start tag\n\
1033 for an element with many attributes), not all of the text may be available.");
1034 
1035 static PyObject *
xmlparse_GetInputContext(xmlparseobject * self,PyObject * unused)1036 xmlparse_GetInputContext(xmlparseobject *self, PyObject *unused)
1037 {
1038     if (self->in_callback) {
1039         int offset, size;
1040         const char *buffer
1041             = XML_GetInputContext(self->itself, &offset, &size);
1042 
1043         if (buffer != NULL)
1044             return PyString_FromStringAndSize(buffer + offset,
1045                                               size - offset);
1046         else
1047             Py_RETURN_NONE;
1048     }
1049     else
1050         Py_RETURN_NONE;
1051 }
1052 
1053 PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
1054 "ExternalEntityParserCreate(context[, encoding])\n\
1055 Create a parser for parsing an external entity based on the\n\
1056 information passed to the ExternalEntityRefHandler.");
1057 
1058 static PyObject *
xmlparse_ExternalEntityParserCreate(xmlparseobject * self,PyObject * args)1059 xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
1060 {
1061     char *context;
1062     char *encoding = NULL;
1063     xmlparseobject *new_parser;
1064     int i;
1065 
1066     if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
1067                           &context, &encoding)) {
1068         return NULL;
1069     }
1070 
1071 #ifndef Py_TPFLAGS_HAVE_GC
1072     /* Python versions 2.0 and 2.1 */
1073     new_parser = PyObject_New(xmlparseobject, &Xmlparsetype);
1074 #else
1075     /* Python versions 2.2 and later */
1076     new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1077 #endif
1078 
1079     if (new_parser == NULL)
1080         return NULL;
1081     new_parser->buffer_size = self->buffer_size;
1082     new_parser->buffer_used = 0;
1083     if (self->buffer != NULL) {
1084         new_parser->buffer = malloc(new_parser->buffer_size);
1085         if (new_parser->buffer == NULL) {
1086 #ifndef Py_TPFLAGS_HAVE_GC
1087             /* Code for versions 2.0 and 2.1 */
1088             PyObject_Del(new_parser);
1089 #else
1090             /* Code for versions 2.2 and later. */
1091             PyObject_GC_Del(new_parser);
1092 #endif
1093             return PyErr_NoMemory();
1094         }
1095     }
1096     else
1097         new_parser->buffer = NULL;
1098     new_parser->returns_unicode = self->returns_unicode;
1099     new_parser->ordered_attributes = self->ordered_attributes;
1100     new_parser->specified_attributes = self->specified_attributes;
1101     new_parser->in_callback = 0;
1102     new_parser->ns_prefixes = self->ns_prefixes;
1103     new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
1104                                                         encoding);
1105     new_parser->handlers = 0;
1106     new_parser->intern = self->intern;
1107     Py_XINCREF(new_parser->intern);
1108 #ifdef Py_TPFLAGS_HAVE_GC
1109     PyObject_GC_Track(new_parser);
1110 #else
1111     PyObject_GC_Init(new_parser);
1112 #endif
1113 
1114     if (!new_parser->itself) {
1115         Py_DECREF(new_parser);
1116         return PyErr_NoMemory();
1117     }
1118 
1119     XML_SetUserData(new_parser->itself, (void *)new_parser);
1120 
1121     /* allocate and clear handlers first */
1122     for (i = 0; handler_info[i].name != NULL; i++)
1123         /* do nothing */;
1124 
1125     new_parser->handlers = malloc(sizeof(PyObject *) * i);
1126     if (!new_parser->handlers) {
1127         Py_DECREF(new_parser);
1128         return PyErr_NoMemory();
1129     }
1130     clear_handlers(new_parser, 1);
1131 
1132     /* then copy handlers from self */
1133     for (i = 0; handler_info[i].name != NULL; i++) {
1134         PyObject *handler = self->handlers[i];
1135         if (handler != NULL) {
1136             Py_INCREF(handler);
1137             new_parser->handlers[i] = handler;
1138             handler_info[i].setter(new_parser->itself,
1139                                    handler_info[i].handler);
1140         }
1141     }
1142     return (PyObject *)new_parser;
1143 }
1144 
1145 PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
1146 "SetParamEntityParsing(flag) -> success\n\
1147 Controls parsing of parameter entities (including the external DTD\n\
1148 subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1149 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1150 XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
1151 was successful.");
1152 
1153 static PyObject*
xmlparse_SetParamEntityParsing(xmlparseobject * p,PyObject * args)1154 xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
1155 {
1156     int flag;
1157     if (!PyArg_ParseTuple(args, "i", &flag))
1158         return NULL;
1159     flag = XML_SetParamEntityParsing(p->itself, flag);
1160     return PyInt_FromLong(flag);
1161 }
1162 
1163 
1164 #if XML_COMBINED_VERSION >= 19505
1165 PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
1166 "UseForeignDTD([flag])\n\
1167 Allows the application to provide an artificial external subset if one is\n\
1168 not specified as part of the document instance.  This readily allows the\n\
1169 use of a 'default' document type controlled by the application, while still\n\
1170 getting the advantage of providing document type information to the parser.\n\
1171 'flag' defaults to True if not provided.");
1172 
1173 static PyObject *
xmlparse_UseForeignDTD(xmlparseobject * self,PyObject * args)1174 xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
1175 {
1176     PyObject *flagobj = NULL;
1177     int flag = 1;
1178     enum XML_Error rc;
1179     if (!PyArg_ParseTuple(args, "|O:UseForeignDTD", &flagobj))
1180         return NULL;
1181     if (flagobj != NULL) {
1182         flag = PyObject_IsTrue(flagobj);
1183         if (flag < 0)
1184             return NULL;
1185     }
1186     rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
1187     if (rc != XML_ERROR_NONE) {
1188         return set_error(self, rc);
1189     }
1190     Py_INCREF(Py_None);
1191     return Py_None;
1192 }
1193 #endif
1194 
1195 static struct PyMethodDef xmlparse_methods[] = {
1196     {"Parse",     (PyCFunction)xmlparse_Parse,
1197                   METH_VARARGS, xmlparse_Parse__doc__},
1198     {"ParseFile", (PyCFunction)xmlparse_ParseFile,
1199                   METH_O,       xmlparse_ParseFile__doc__},
1200     {"SetBase",   (PyCFunction)xmlparse_SetBase,
1201                   METH_VARARGS, xmlparse_SetBase__doc__},
1202     {"GetBase",   (PyCFunction)xmlparse_GetBase,
1203                   METH_NOARGS, xmlparse_GetBase__doc__},
1204     {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
1205                   METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
1206     {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
1207                   METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
1208     {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
1209                   METH_NOARGS, xmlparse_GetInputContext__doc__},
1210 #if XML_COMBINED_VERSION >= 19505
1211     {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
1212                   METH_VARARGS, xmlparse_UseForeignDTD__doc__},
1213 #endif
1214     {NULL,        NULL}         /* sentinel */
1215 };
1216 
1217 /* ---------- */
1218 
1219 
1220 #ifdef Py_USING_UNICODE
1221 
1222 /* pyexpat international encoding support.
1223    Make it as simple as possible.
1224 */
1225 
1226 static char template_buffer[257];
1227 PyObject *template_string = NULL;
1228 
1229 static void
init_template_buffer(void)1230 init_template_buffer(void)
1231 {
1232     int i;
1233     for (i = 0; i < 256; i++) {
1234         template_buffer[i] = i;
1235     }
1236     template_buffer[256] = 0;
1237 }
1238 
1239 static int
PyUnknownEncodingHandler(void * encodingHandlerData,const XML_Char * name,XML_Encoding * info)1240 PyUnknownEncodingHandler(void *encodingHandlerData,
1241                          const XML_Char *name,
1242                          XML_Encoding *info)
1243 {
1244     PyUnicodeObject *_u_string = NULL;
1245     int result = 0;
1246     int i;
1247 
1248     /* Yes, supports only 8bit encodings */
1249     _u_string = (PyUnicodeObject *)
1250         PyUnicode_Decode(template_buffer, 256, name, "replace");
1251 
1252     if (_u_string == NULL)
1253         return result;
1254 
1255     if (PyUnicode_GET_SIZE(_u_string) != 256) {
1256         Py_DECREF(_u_string);
1257         PyErr_SetString(PyExc_ValueError,
1258                         "multi-byte encodings are not supported");
1259         return result;
1260     }
1261 
1262     for (i = 0; i < 256; i++) {
1263         /* Stupid to access directly, but fast */
1264         Py_UNICODE c = _u_string->str[i];
1265         if (c == Py_UNICODE_REPLACEMENT_CHARACTER)
1266             info->map[i] = -1;
1267         else
1268             info->map[i] = c;
1269     }
1270     info->data = NULL;
1271     info->convert = NULL;
1272     info->release = NULL;
1273     result = 1;
1274     Py_DECREF(_u_string);
1275     return result;
1276 }
1277 
1278 #endif
1279 
1280 static PyObject *
newxmlparseobject(char * encoding,char * namespace_separator,PyObject * intern)1281 newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
1282 {
1283     int i;
1284     xmlparseobject *self;
1285 
1286 #ifdef Py_TPFLAGS_HAVE_GC
1287     /* Code for versions 2.2 and later */
1288     self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1289 #else
1290     self = PyObject_New(xmlparseobject, &Xmlparsetype);
1291 #endif
1292     if (self == NULL)
1293         return NULL;
1294 
1295 #ifdef Py_USING_UNICODE
1296     self->returns_unicode = 1;
1297 #else
1298     self->returns_unicode = 0;
1299 #endif
1300 
1301     self->buffer = NULL;
1302     self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1303     self->buffer_used = 0;
1304     self->ordered_attributes = 0;
1305     self->specified_attributes = 0;
1306     self->in_callback = 0;
1307     self->ns_prefixes = 0;
1308     self->handlers = NULL;
1309     if (namespace_separator != NULL) {
1310         self->itself = XML_ParserCreateNS(encoding, *namespace_separator);
1311     }
1312     else {
1313         self->itself = XML_ParserCreate(encoding);
1314     }
1315     if (self->itself == NULL) {
1316         PyErr_SetString(PyExc_RuntimeError,
1317                         "XML_ParserCreate failed");
1318         Py_DECREF(self);
1319         return NULL;
1320     }
1321 #if XML_COMBINED_VERSION >= 20100
1322     /* This feature was added upstream in libexpat 2.1.0. */
1323     XML_SetHashSalt(self->itself,
1324                     (unsigned long)_Py_HashSecret.prefix);
1325 #endif
1326     self->intern = intern;
1327     Py_XINCREF(self->intern);
1328 #ifdef Py_TPFLAGS_HAVE_GC
1329     PyObject_GC_Track(self);
1330 #else
1331     PyObject_GC_Init(self);
1332 #endif
1333     XML_SetUserData(self->itself, (void *)self);
1334 #ifdef Py_USING_UNICODE
1335     XML_SetUnknownEncodingHandler(self->itself,
1336                   (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
1337 #endif
1338 
1339     for (i = 0; handler_info[i].name != NULL; i++)
1340         /* do nothing */;
1341 
1342     self->handlers = malloc(sizeof(PyObject *) * i);
1343     if (!self->handlers) {
1344         Py_DECREF(self);
1345         return PyErr_NoMemory();
1346     }
1347     clear_handlers(self, 1);
1348 
1349     return (PyObject*)self;
1350 }
1351 
1352 
1353 static void
xmlparse_dealloc(xmlparseobject * self)1354 xmlparse_dealloc(xmlparseobject *self)
1355 {
1356     int i;
1357 #ifdef Py_TPFLAGS_HAVE_GC
1358     PyObject_GC_UnTrack(self);
1359 #else
1360     PyObject_GC_Fini(self);
1361 #endif
1362     if (self->itself != NULL)
1363         XML_ParserFree(self->itself);
1364     self->itself = NULL;
1365 
1366     if (self->handlers != NULL) {
1367         PyObject *temp;
1368         for (i = 0; handler_info[i].name != NULL; i++) {
1369             temp = self->handlers[i];
1370             self->handlers[i] = NULL;
1371             Py_XDECREF(temp);
1372         }
1373         free(self->handlers);
1374         self->handlers = NULL;
1375     }
1376     if (self->buffer != NULL) {
1377         free(self->buffer);
1378         self->buffer = NULL;
1379     }
1380     Py_XDECREF(self->intern);
1381 #ifndef Py_TPFLAGS_HAVE_GC
1382     /* Code for versions 2.0 and 2.1 */
1383     PyObject_Del(self);
1384 #else
1385     /* Code for versions 2.2 and later. */
1386     PyObject_GC_Del(self);
1387 #endif
1388 }
1389 
1390 static int
handlername2int(const char * name)1391 handlername2int(const char *name)
1392 {
1393     int i;
1394     for (i = 0; handler_info[i].name != NULL; i++) {
1395         if (strcmp(name, handler_info[i].name) == 0) {
1396             return i;
1397         }
1398     }
1399     return -1;
1400 }
1401 
1402 static PyObject *
get_pybool(int istrue)1403 get_pybool(int istrue)
1404 {
1405     PyObject *result = istrue ? Py_True : Py_False;
1406     Py_INCREF(result);
1407     return result;
1408 }
1409 
1410 static PyObject *
xmlparse_getattr(xmlparseobject * self,char * name)1411 xmlparse_getattr(xmlparseobject *self, char *name)
1412 {
1413     int handlernum = handlername2int(name);
1414 
1415     if (handlernum != -1) {
1416         PyObject *result = self->handlers[handlernum];
1417         if (result == NULL)
1418             result = Py_None;
1419         Py_INCREF(result);
1420         return result;
1421     }
1422     if (name[0] == 'E') {
1423         if (strcmp(name, "ErrorCode") == 0)
1424             return PyInt_FromLong((long)
1425                                   XML_GetErrorCode(self->itself));
1426         if (strcmp(name, "ErrorLineNumber") == 0)
1427             return PyInt_FromLong((long)
1428                                   XML_GetErrorLineNumber(self->itself));
1429         if (strcmp(name, "ErrorColumnNumber") == 0)
1430             return PyInt_FromLong((long)
1431                                   XML_GetErrorColumnNumber(self->itself));
1432         if (strcmp(name, "ErrorByteIndex") == 0)
1433             return PyInt_FromLong((long)
1434                                   XML_GetErrorByteIndex(self->itself));
1435     }
1436     if (name[0] == 'C') {
1437         if (strcmp(name, "CurrentLineNumber") == 0)
1438             return PyInt_FromLong((long)
1439                                   XML_GetCurrentLineNumber(self->itself));
1440         if (strcmp(name, "CurrentColumnNumber") == 0)
1441             return PyInt_FromLong((long)
1442                                   XML_GetCurrentColumnNumber(self->itself));
1443         if (strcmp(name, "CurrentByteIndex") == 0)
1444             return PyInt_FromLong((long)
1445                                   XML_GetCurrentByteIndex(self->itself));
1446     }
1447     if (name[0] == 'b') {
1448         if (strcmp(name, "buffer_size") == 0)
1449             return PyInt_FromLong((long) self->buffer_size);
1450         if (strcmp(name, "buffer_text") == 0)
1451             return get_pybool(self->buffer != NULL);
1452         if (strcmp(name, "buffer_used") == 0)
1453             return PyInt_FromLong((long) self->buffer_used);
1454     }
1455     if (strcmp(name, "namespace_prefixes") == 0)
1456         return get_pybool(self->ns_prefixes);
1457     if (strcmp(name, "ordered_attributes") == 0)
1458         return get_pybool(self->ordered_attributes);
1459     if (strcmp(name, "returns_unicode") == 0)
1460         return get_pybool((long) self->returns_unicode);
1461     if (strcmp(name, "specified_attributes") == 0)
1462         return get_pybool((long) self->specified_attributes);
1463     if (strcmp(name, "intern") == 0) {
1464         if (self->intern == NULL) {
1465             Py_INCREF(Py_None);
1466             return Py_None;
1467         }
1468         else {
1469             Py_INCREF(self->intern);
1470             return self->intern;
1471         }
1472     }
1473 
1474 #define APPEND(list, str)                               \
1475         do {                                            \
1476                 PyObject *o = PyString_FromString(str); \
1477                 if (o != NULL)                          \
1478                         PyList_Append(list, o);         \
1479                 Py_XDECREF(o);                          \
1480         } while (0)
1481 
1482     if (strcmp(name, "__members__") == 0) {
1483         int i;
1484         PyObject *rc = PyList_New(0);
1485         if (!rc)
1486                 return NULL;
1487         for (i = 0; handler_info[i].name != NULL; i++) {
1488             PyObject *o = get_handler_name(&handler_info[i]);
1489             if (o != NULL)
1490                 PyList_Append(rc, o);
1491             Py_XDECREF(o);
1492         }
1493         APPEND(rc, "ErrorCode");
1494         APPEND(rc, "ErrorLineNumber");
1495         APPEND(rc, "ErrorColumnNumber");
1496         APPEND(rc, "ErrorByteIndex");
1497         APPEND(rc, "CurrentLineNumber");
1498         APPEND(rc, "CurrentColumnNumber");
1499         APPEND(rc, "CurrentByteIndex");
1500         APPEND(rc, "buffer_size");
1501         APPEND(rc, "buffer_text");
1502         APPEND(rc, "buffer_used");
1503         APPEND(rc, "namespace_prefixes");
1504         APPEND(rc, "ordered_attributes");
1505         APPEND(rc, "returns_unicode");
1506         APPEND(rc, "specified_attributes");
1507         APPEND(rc, "intern");
1508 
1509 #undef APPEND
1510         return rc;
1511     }
1512     return Py_FindMethod(xmlparse_methods, (PyObject *)self, name);
1513 }
1514 
1515 static int
sethandler(xmlparseobject * self,const char * name,PyObject * v)1516 sethandler(xmlparseobject *self, const char *name, PyObject* v)
1517 {
1518     int handlernum = handlername2int(name);
1519     if (handlernum >= 0) {
1520         xmlhandler c_handler = NULL;
1521         PyObject *temp = self->handlers[handlernum];
1522 
1523         if (v == Py_None) {
1524             /* If this is the character data handler, and a character
1525                data handler is already active, we need to be more
1526                careful.  What we can safely do is replace the existing
1527                character data handler callback function with a no-op
1528                function that will refuse to call Python.  The downside
1529                is that this doesn't completely remove the character
1530                data handler from the C layer if there's any callback
1531                active, so Expat does a little more work than it
1532                otherwise would, but that's really an odd case.  A more
1533                elaborate system of handlers and state could remove the
1534                C handler more effectively. */
1535             if (handlernum == CharacterData && self->in_callback)
1536                 c_handler = noop_character_data_handler;
1537             v = NULL;
1538         }
1539         else if (v != NULL) {
1540             Py_INCREF(v);
1541             c_handler = handler_info[handlernum].handler;
1542         }
1543         self->handlers[handlernum] = v;
1544         Py_XDECREF(temp);
1545         handler_info[handlernum].setter(self->itself, c_handler);
1546         return 1;
1547     }
1548     return 0;
1549 }
1550 
1551 static int
xmlparse_setattr(xmlparseobject * self,char * name,PyObject * v)1552 xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v)
1553 {
1554     /* Set attribute 'name' to value 'v'. v==NULL means delete */
1555     if (v == NULL) {
1556         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1557         return -1;
1558     }
1559     if (strcmp(name, "buffer_text") == 0) {
1560         int b = PyObject_IsTrue(v);
1561         if (b < 0)
1562             return -1;
1563         if (b) {
1564             if (self->buffer == NULL) {
1565                 self->buffer = malloc(self->buffer_size);
1566                 if (self->buffer == NULL) {
1567                     PyErr_NoMemory();
1568                     return -1;
1569                 }
1570                 self->buffer_used = 0;
1571             }
1572         }
1573         else if (self->buffer != NULL) {
1574             if (flush_character_buffer(self) < 0)
1575                 return -1;
1576             free(self->buffer);
1577             self->buffer = NULL;
1578         }
1579         return 0;
1580     }
1581     if (strcmp(name, "namespace_prefixes") == 0) {
1582         int b = PyObject_IsTrue(v);
1583         if (b < 0)
1584             return -1;
1585         self->ns_prefixes = b;
1586         XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1587         return 0;
1588     }
1589     if (strcmp(name, "ordered_attributes") == 0) {
1590         int b = PyObject_IsTrue(v);
1591         if (b < 0)
1592             return -1;
1593         self->ordered_attributes = b;
1594         return 0;
1595     }
1596     if (strcmp(name, "returns_unicode") == 0) {
1597         int b = PyObject_IsTrue(v);
1598         if (b < 0)
1599             return -1;
1600 #ifndef Py_USING_UNICODE
1601         if (b) {
1602             PyErr_SetString(PyExc_ValueError,
1603                             "Unicode support not available");
1604             return -1;
1605         }
1606 #endif
1607         self->returns_unicode = b;
1608         return 0;
1609     }
1610     if (strcmp(name, "specified_attributes") == 0) {
1611         int b = PyObject_IsTrue(v);
1612         if (b < 0)
1613             return -1;
1614         self->specified_attributes = b;
1615         return 0;
1616     }
1617 
1618     if (strcmp(name, "buffer_size") == 0) {
1619       long new_buffer_size;
1620       if (!PyInt_Check(v)) {
1621         PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1622         return -1;
1623       }
1624 
1625       new_buffer_size=PyInt_AS_LONG(v);
1626       /* trivial case -- no change */
1627       if (new_buffer_size == self->buffer_size) {
1628         return 0;
1629       }
1630 
1631       if (new_buffer_size <= 0) {
1632         PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1633         return -1;
1634       }
1635 
1636       /* check maximum */
1637       if (new_buffer_size > INT_MAX) {
1638         char errmsg[100];
1639         sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1640         PyErr_SetString(PyExc_ValueError, errmsg);
1641         return -1;
1642       }
1643 
1644       if (self->buffer != NULL) {
1645         /* there is already a buffer */
1646         if (self->buffer_used != 0) {
1647           flush_character_buffer(self);
1648         }
1649         /* free existing buffer */
1650         free(self->buffer);
1651       }
1652       self->buffer = malloc(new_buffer_size);
1653       if (self->buffer == NULL) {
1654         PyErr_NoMemory();
1655         return -1;
1656       }
1657       self->buffer_size = new_buffer_size;
1658       return 0;
1659     }
1660 
1661     if (strcmp(name, "CharacterDataHandler") == 0) {
1662         /* If we're changing the character data handler, flush all
1663          * cached data with the old handler.  Not sure there's a
1664          * "right" thing to do, though, but this probably won't
1665          * happen.
1666          */
1667         if (flush_character_buffer(self) < 0)
1668             return -1;
1669     }
1670     if (sethandler(self, name, v)) {
1671         return 0;
1672     }
1673     PyErr_SetString(PyExc_AttributeError, name);
1674     return -1;
1675 }
1676 
1677 #ifdef WITH_CYCLE_GC
1678 static int
xmlparse_traverse(xmlparseobject * op,visitproc visit,void * arg)1679 xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1680 {
1681     int i;
1682     for (i = 0; handler_info[i].name != NULL; i++)
1683         Py_VISIT(op->handlers[i]);
1684     return 0;
1685 }
1686 
1687 static int
xmlparse_clear(xmlparseobject * op)1688 xmlparse_clear(xmlparseobject *op)
1689 {
1690     clear_handlers(op, 0);
1691     Py_CLEAR(op->intern);
1692     return 0;
1693 }
1694 #endif
1695 
1696 PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
1697 
1698 static PyTypeObject Xmlparsetype = {
1699         PyVarObject_HEAD_INIT(NULL, 0)
1700         "pyexpat.xmlparser",            /*tp_name*/
1701         sizeof(xmlparseobject) + PyGC_HEAD_SIZE,/*tp_basicsize*/
1702         0,                              /*tp_itemsize*/
1703         /* methods */
1704         (destructor)xmlparse_dealloc,   /*tp_dealloc*/
1705         (printfunc)0,           /*tp_print*/
1706         (getattrfunc)xmlparse_getattr,  /*tp_getattr*/
1707         (setattrfunc)xmlparse_setattr,  /*tp_setattr*/
1708         (cmpfunc)0,             /*tp_compare*/
1709         (reprfunc)0,            /*tp_repr*/
1710         0,                      /*tp_as_number*/
1711         0,              /*tp_as_sequence*/
1712         0,              /*tp_as_mapping*/
1713         (hashfunc)0,            /*tp_hash*/
1714         (ternaryfunc)0,         /*tp_call*/
1715         (reprfunc)0,            /*tp_str*/
1716         0,              /* tp_getattro */
1717         0,              /* tp_setattro */
1718         0,              /* tp_as_buffer */
1719 #ifdef Py_TPFLAGS_HAVE_GC
1720         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1721 #else
1722         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_GC, /*tp_flags*/
1723 #endif
1724         Xmlparsetype__doc__, /* tp_doc - Documentation string */
1725 #ifdef WITH_CYCLE_GC
1726         (traverseproc)xmlparse_traverse,        /* tp_traverse */
1727         (inquiry)xmlparse_clear         /* tp_clear */
1728 #else
1729         0, 0
1730 #endif
1731 };
1732 
1733 /* End of code for xmlparser objects */
1734 /* -------------------------------------------------------- */
1735 
1736 PyDoc_STRVAR(pyexpat_ParserCreate__doc__,
1737 "ParserCreate([encoding[, namespace_separator]]) -> parser\n\
1738 Return a new XML parser object.");
1739 
1740 static PyObject *
pyexpat_ParserCreate(PyObject * notused,PyObject * args,PyObject * kw)1741 pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
1742 {
1743     char *encoding = NULL;
1744     char *namespace_separator = NULL;
1745     PyObject *intern = NULL;
1746     PyObject *result;
1747     int intern_decref = 0;
1748     static char *kwlist[] = {"encoding", "namespace_separator",
1749                                    "intern", NULL};
1750 
1751     if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
1752                                      &encoding, &namespace_separator, &intern))
1753         return NULL;
1754     if (namespace_separator != NULL
1755         && strlen(namespace_separator) > 1) {
1756         PyErr_SetString(PyExc_ValueError,
1757                         "namespace_separator must be at most one"
1758                         " character, omitted, or None");
1759         return NULL;
1760     }
1761     /* Explicitly passing None means no interning is desired.
1762        Not passing anything means that a new dictionary is used. */
1763     if (intern == Py_None)
1764         intern = NULL;
1765     else if (intern == NULL) {
1766         intern = PyDict_New();
1767         if (!intern)
1768             return NULL;
1769         intern_decref = 1;
1770     }
1771     else if (!PyDict_Check(intern)) {
1772         PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1773         return NULL;
1774     }
1775 
1776     result = newxmlparseobject(encoding, namespace_separator, intern);
1777     if (intern_decref) {
1778         Py_DECREF(intern);
1779     }
1780     return result;
1781 }
1782 
1783 PyDoc_STRVAR(pyexpat_ErrorString__doc__,
1784 "ErrorString(errno) -> string\n\
1785 Returns string error for given number.");
1786 
1787 static PyObject *
pyexpat_ErrorString(PyObject * self,PyObject * args)1788 pyexpat_ErrorString(PyObject *self, PyObject *args)
1789 {
1790     long code = 0;
1791 
1792     if (!PyArg_ParseTuple(args, "l:ErrorString", &code))
1793         return NULL;
1794     return Py_BuildValue("z", XML_ErrorString((int)code));
1795 }
1796 
1797 /* List of methods defined in the module */
1798 
1799 static struct PyMethodDef pyexpat_methods[] = {
1800     {"ParserCreate",    (PyCFunction)pyexpat_ParserCreate,
1801      METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__},
1802     {"ErrorString",     (PyCFunction)pyexpat_ErrorString,
1803      METH_VARARGS,      pyexpat_ErrorString__doc__},
1804 
1805     {NULL,       (PyCFunction)NULL, 0, NULL}            /* sentinel */
1806 };
1807 
1808 /* Module docstring */
1809 
1810 PyDoc_STRVAR(pyexpat_module_documentation,
1811 "Python wrapper for Expat parser.");
1812 
1813 /* Initialization function for the module */
1814 
1815 #ifndef MODULE_NAME
1816 #define MODULE_NAME "pyexpat"
1817 #endif
1818 
1819 #ifndef MODULE_INITFUNC
1820 #define MODULE_INITFUNC initpyexpat
1821 #endif
1822 
1823 #ifndef PyMODINIT_FUNC
1824 #   ifdef MS_WINDOWS
1825 #       define PyMODINIT_FUNC __declspec(dllexport) void
1826 #   else
1827 #       define PyMODINIT_FUNC void
1828 #   endif
1829 #endif
1830 
1831 PyMODINIT_FUNC MODULE_INITFUNC(void);  /* avoid compiler warnings */
1832 
1833 PyMODINIT_FUNC
MODULE_INITFUNC(void)1834 MODULE_INITFUNC(void)
1835 {
1836     PyObject *m, *d;
1837     PyObject *errmod_name = PyString_FromString(MODULE_NAME ".errors");
1838     PyObject *errors_module;
1839     PyObject *modelmod_name;
1840     PyObject *model_module;
1841     PyObject *sys_modules;
1842     PyObject *version;
1843     static struct PyExpat_CAPI capi;
1844     PyObject* capi_object;
1845 
1846     if (errmod_name == NULL)
1847         return;
1848     modelmod_name = PyString_FromString(MODULE_NAME ".model");
1849     if (modelmod_name == NULL)
1850         return;
1851 
1852     Py_TYPE(&Xmlparsetype) = &PyType_Type;
1853 
1854     /* Create the module and add the functions */
1855     m = Py_InitModule3(MODULE_NAME, pyexpat_methods,
1856                        pyexpat_module_documentation);
1857     if (m == NULL)
1858         return;
1859 
1860     /* Add some symbolic constants to the module */
1861     if (ErrorObject == NULL) {
1862         ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
1863                                          NULL, NULL);
1864         if (ErrorObject == NULL)
1865             return;
1866     }
1867     Py_INCREF(ErrorObject);
1868     PyModule_AddObject(m, "error", ErrorObject);
1869     Py_INCREF(ErrorObject);
1870     PyModule_AddObject(m, "ExpatError", ErrorObject);
1871     Py_INCREF(&Xmlparsetype);
1872     PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
1873 
1874     version = PyString_FromString(PY_VERSION);
1875     if (!version)
1876         return;
1877     PyModule_AddObject(m, "__version__", version);
1878     PyModule_AddStringConstant(m, "EXPAT_VERSION",
1879                                (char *) XML_ExpatVersion());
1880     {
1881         XML_Expat_Version info = XML_ExpatVersionInfo();
1882         PyModule_AddObject(m, "version_info",
1883                            Py_BuildValue("(iii)", info.major,
1884                                          info.minor, info.micro));
1885     }
1886 #ifdef Py_USING_UNICODE
1887     init_template_buffer();
1888 #endif
1889     /* XXX When Expat supports some way of figuring out how it was
1890        compiled, this should check and set native_encoding
1891        appropriately.
1892     */
1893     PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
1894 
1895     sys_modules = PySys_GetObject("modules");
1896     d = PyModule_GetDict(m);
1897     errors_module = PyDict_GetItem(d, errmod_name);
1898     if (errors_module == NULL) {
1899         errors_module = PyModule_New(MODULE_NAME ".errors");
1900         if (errors_module != NULL) {
1901             PyDict_SetItem(sys_modules, errmod_name, errors_module);
1902             /* gives away the reference to errors_module */
1903             PyModule_AddObject(m, "errors", errors_module);
1904         }
1905     }
1906     Py_DECREF(errmod_name);
1907     model_module = PyDict_GetItem(d, modelmod_name);
1908     if (model_module == NULL) {
1909         model_module = PyModule_New(MODULE_NAME ".model");
1910         if (model_module != NULL) {
1911             PyDict_SetItem(sys_modules, modelmod_name, model_module);
1912             /* gives away the reference to model_module */
1913             PyModule_AddObject(m, "model", model_module);
1914         }
1915     }
1916     Py_DECREF(modelmod_name);
1917     if (errors_module == NULL || model_module == NULL)
1918         /* Don't core dump later! */
1919         return;
1920 
1921 #if XML_COMBINED_VERSION > 19505
1922     {
1923         const XML_Feature *features = XML_GetFeatureList();
1924         PyObject *list = PyList_New(0);
1925         if (list == NULL)
1926             /* just ignore it */
1927             PyErr_Clear();
1928         else {
1929             int i = 0;
1930             for (; features[i].feature != XML_FEATURE_END; ++i) {
1931                 int ok;
1932                 PyObject *item = Py_BuildValue("si", features[i].name,
1933                                                features[i].value);
1934                 if (item == NULL) {
1935                     Py_DECREF(list);
1936                     list = NULL;
1937                     break;
1938                 }
1939                 ok = PyList_Append(list, item);
1940                 Py_DECREF(item);
1941                 if (ok < 0) {
1942                     PyErr_Clear();
1943                     break;
1944                 }
1945             }
1946             if (list != NULL)
1947                 PyModule_AddObject(m, "features", list);
1948         }
1949     }
1950 #endif
1951 
1952 #define MYCONST(name) \
1953     PyModule_AddStringConstant(errors_module, #name, \
1954                                (char*)XML_ErrorString(name))
1955 
1956     MYCONST(XML_ERROR_NO_MEMORY);
1957     MYCONST(XML_ERROR_SYNTAX);
1958     MYCONST(XML_ERROR_NO_ELEMENTS);
1959     MYCONST(XML_ERROR_INVALID_TOKEN);
1960     MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1961     MYCONST(XML_ERROR_PARTIAL_CHAR);
1962     MYCONST(XML_ERROR_TAG_MISMATCH);
1963     MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1964     MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1965     MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1966     MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1967     MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1968     MYCONST(XML_ERROR_ASYNC_ENTITY);
1969     MYCONST(XML_ERROR_BAD_CHAR_REF);
1970     MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1971     MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1972     MYCONST(XML_ERROR_MISPLACED_XML_PI);
1973     MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1974     MYCONST(XML_ERROR_INCORRECT_ENCODING);
1975     MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1976     MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1977     MYCONST(XML_ERROR_NOT_STANDALONE);
1978     MYCONST(XML_ERROR_UNEXPECTED_STATE);
1979     MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1980     MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1981     MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1982     /* Added in Expat 1.95.7. */
1983     MYCONST(XML_ERROR_UNBOUND_PREFIX);
1984     /* Added in Expat 1.95.8. */
1985     MYCONST(XML_ERROR_UNDECLARING_PREFIX);
1986     MYCONST(XML_ERROR_INCOMPLETE_PE);
1987     MYCONST(XML_ERROR_XML_DECL);
1988     MYCONST(XML_ERROR_TEXT_DECL);
1989     MYCONST(XML_ERROR_PUBLICID);
1990     MYCONST(XML_ERROR_SUSPENDED);
1991     MYCONST(XML_ERROR_NOT_SUSPENDED);
1992     MYCONST(XML_ERROR_ABORTED);
1993     MYCONST(XML_ERROR_FINISHED);
1994     MYCONST(XML_ERROR_SUSPEND_PE);
1995 
1996     PyModule_AddStringConstant(errors_module, "__doc__",
1997                                "Constants used to describe error conditions.");
1998 
1999 #undef MYCONST
2000 
2001 #define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
2002     MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
2003     MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
2004     MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
2005 #undef MYCONST
2006 
2007 #define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
2008     PyModule_AddStringConstant(model_module, "__doc__",
2009                      "Constants used to interpret content model information.");
2010 
2011     MYCONST(XML_CTYPE_EMPTY);
2012     MYCONST(XML_CTYPE_ANY);
2013     MYCONST(XML_CTYPE_MIXED);
2014     MYCONST(XML_CTYPE_NAME);
2015     MYCONST(XML_CTYPE_CHOICE);
2016     MYCONST(XML_CTYPE_SEQ);
2017 
2018     MYCONST(XML_CQUANT_NONE);
2019     MYCONST(XML_CQUANT_OPT);
2020     MYCONST(XML_CQUANT_REP);
2021     MYCONST(XML_CQUANT_PLUS);
2022 #undef MYCONST
2023 
2024     /* initialize pyexpat dispatch table */
2025     capi.size = sizeof(capi);
2026     capi.magic = PyExpat_CAPI_MAGIC;
2027     capi.MAJOR_VERSION = XML_MAJOR_VERSION;
2028     capi.MINOR_VERSION = XML_MINOR_VERSION;
2029     capi.MICRO_VERSION = XML_MICRO_VERSION;
2030     capi.ErrorString = XML_ErrorString;
2031     capi.GetErrorCode = XML_GetErrorCode;
2032     capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
2033     capi.GetErrorLineNumber = XML_GetErrorLineNumber;
2034     capi.Parse = XML_Parse;
2035     capi.ParserCreate_MM = XML_ParserCreate_MM;
2036     capi.ParserFree = XML_ParserFree;
2037     capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
2038     capi.SetCommentHandler = XML_SetCommentHandler;
2039     capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
2040     capi.SetElementHandler = XML_SetElementHandler;
2041     capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
2042     capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
2043     capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
2044     capi.SetUserData = XML_SetUserData;
2045 
2046     /* export using capsule */
2047     capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
2048     if (capi_object)
2049         PyModule_AddObject(m, "expat_CAPI", capi_object);
2050 }
2051 
2052 static void
clear_handlers(xmlparseobject * self,int initial)2053 clear_handlers(xmlparseobject *self, int initial)
2054 {
2055     int i = 0;
2056     PyObject *temp;
2057 
2058     for (; handler_info[i].name != NULL; i++) {
2059         if (initial)
2060             self->handlers[i] = NULL;
2061         else {
2062             temp = self->handlers[i];
2063             self->handlers[i] = NULL;
2064             Py_XDECREF(temp);
2065             handler_info[i].setter(self->itself, NULL);
2066         }
2067     }
2068 }
2069 
2070 static struct HandlerInfo handler_info[] = {
2071     {"StartElementHandler",
2072      (xmlhandlersetter)XML_SetStartElementHandler,
2073      (xmlhandler)my_StartElementHandler},
2074     {"EndElementHandler",
2075      (xmlhandlersetter)XML_SetEndElementHandler,
2076      (xmlhandler)my_EndElementHandler},
2077     {"ProcessingInstructionHandler",
2078      (xmlhandlersetter)XML_SetProcessingInstructionHandler,
2079      (xmlhandler)my_ProcessingInstructionHandler},
2080     {"CharacterDataHandler",
2081      (xmlhandlersetter)XML_SetCharacterDataHandler,
2082      (xmlhandler)my_CharacterDataHandler},
2083     {"UnparsedEntityDeclHandler",
2084      (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler,
2085      (xmlhandler)my_UnparsedEntityDeclHandler},
2086     {"NotationDeclHandler",
2087      (xmlhandlersetter)XML_SetNotationDeclHandler,
2088      (xmlhandler)my_NotationDeclHandler},
2089     {"StartNamespaceDeclHandler",
2090      (xmlhandlersetter)XML_SetStartNamespaceDeclHandler,
2091      (xmlhandler)my_StartNamespaceDeclHandler},
2092     {"EndNamespaceDeclHandler",
2093      (xmlhandlersetter)XML_SetEndNamespaceDeclHandler,
2094      (xmlhandler)my_EndNamespaceDeclHandler},
2095     {"CommentHandler",
2096      (xmlhandlersetter)XML_SetCommentHandler,
2097      (xmlhandler)my_CommentHandler},
2098     {"StartCdataSectionHandler",
2099      (xmlhandlersetter)XML_SetStartCdataSectionHandler,
2100      (xmlhandler)my_StartCdataSectionHandler},
2101     {"EndCdataSectionHandler",
2102      (xmlhandlersetter)XML_SetEndCdataSectionHandler,
2103      (xmlhandler)my_EndCdataSectionHandler},
2104     {"DefaultHandler",
2105      (xmlhandlersetter)XML_SetDefaultHandler,
2106      (xmlhandler)my_DefaultHandler},
2107     {"DefaultHandlerExpand",
2108      (xmlhandlersetter)XML_SetDefaultHandlerExpand,
2109      (xmlhandler)my_DefaultHandlerExpandHandler},
2110     {"NotStandaloneHandler",
2111      (xmlhandlersetter)XML_SetNotStandaloneHandler,
2112      (xmlhandler)my_NotStandaloneHandler},
2113     {"ExternalEntityRefHandler",
2114      (xmlhandlersetter)XML_SetExternalEntityRefHandler,
2115      (xmlhandler)my_ExternalEntityRefHandler},
2116     {"StartDoctypeDeclHandler",
2117      (xmlhandlersetter)XML_SetStartDoctypeDeclHandler,
2118      (xmlhandler)my_StartDoctypeDeclHandler},
2119     {"EndDoctypeDeclHandler",
2120      (xmlhandlersetter)XML_SetEndDoctypeDeclHandler,
2121      (xmlhandler)my_EndDoctypeDeclHandler},
2122     {"EntityDeclHandler",
2123      (xmlhandlersetter)XML_SetEntityDeclHandler,
2124      (xmlhandler)my_EntityDeclHandler},
2125     {"XmlDeclHandler",
2126      (xmlhandlersetter)XML_SetXmlDeclHandler,
2127      (xmlhandler)my_XmlDeclHandler},
2128     {"ElementDeclHandler",
2129      (xmlhandlersetter)XML_SetElementDeclHandler,
2130      (xmlhandler)my_ElementDeclHandler},
2131     {"AttlistDeclHandler",
2132      (xmlhandlersetter)XML_SetAttlistDeclHandler,
2133      (xmlhandler)my_AttlistDeclHandler},
2134 #if XML_COMBINED_VERSION >= 19504
2135     {"SkippedEntityHandler",
2136      (xmlhandlersetter)XML_SetSkippedEntityHandler,
2137      (xmlhandler)my_SkippedEntityHandler},
2138 #endif
2139 
2140     {NULL, NULL, NULL} /* sentinel */
2141 };
2142