1 /*
2  * ElementTree
3  * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
4  *
5  * elementtree accelerator
6  *
7  * History:
8  * 1999-06-20 fl  created (as part of sgmlop)
9  * 2001-05-29 fl  effdom edition
10  * 2003-02-27 fl  elementtree edition (alpha)
11  * 2004-06-03 fl  updates for elementtree 1.2
12  * 2005-01-05 fl  major optimization effort
13  * 2005-01-11 fl  first public release (cElementTree 0.8)
14  * 2005-01-12 fl  split element object into base and extras
15  * 2005-01-13 fl  use tagged pointers for tail/text (cElementTree 0.9)
16  * 2005-01-17 fl  added treebuilder close method
17  * 2005-01-17 fl  fixed crash in getchildren
18  * 2005-01-18 fl  removed observer api, added iterparse (cElementTree 0.9.3)
19  * 2005-01-23 fl  revised iterparse api; added namespace event support (0.9.8)
20  * 2005-01-26 fl  added VERSION module property (cElementTree 1.0)
21  * 2005-01-28 fl  added remove method (1.0.1)
22  * 2005-03-01 fl  added iselement function; fixed makeelement aliasing (1.0.2)
23  * 2005-03-13 fl  export Comment and ProcessingInstruction/PI helpers
24  * 2005-03-26 fl  added Comment and PI support to XMLParser
25  * 2005-03-27 fl  event optimizations; complain about bogus events
26  * 2005-08-08 fl  fixed read error handling in parse
27  * 2005-08-11 fl  added runtime test for copy workaround (1.0.3)
28  * 2005-12-13 fl  added expat_capi support (for xml.etree) (1.0.4)
29  * 2005-12-16 fl  added support for non-standard encodings
30  * 2006-03-08 fl  fixed a couple of potential null-refs and leaks
31  * 2006-03-12 fl  merge in 2.5 ssize_t changes
32  * 2007-08-25 fl  call custom builder's close method from XMLParser
33  * 2007-08-31 fl  added iter, extend from ET 1.3
34  * 2007-09-01 fl  fixed ParseError exception, setslice source type, etc
35  * 2007-09-03 fl  fixed handling of negative insert indexes
36  * 2007-09-04 fl  added itertext from ET 1.3
37  * 2007-09-06 fl  added position attribute to ParseError exception
38  * 2008-06-06 fl  delay error reporting in iterparse (from Hrvoje Niksic)
39  *
40  * Copyright (c) 1999-2009 by Secret Labs AB.  All rights reserved.
41  * Copyright (c) 1999-2009 by Fredrik Lundh.
42  *
43  * info@pythonware.com
44  * http://www.pythonware.com
45  */
46 
47 /* Licensed to PSF under a Contributor Agreement. */
48 /* See http://www.python.org/psf/license for licensing details. */
49 
50 #include "Python.h"
51 
52 #define VERSION "1.0.6"
53 
54 /* -------------------------------------------------------------------- */
55 /* configuration */
56 
57 /* Leave defined to include the expat-based XMLParser type */
58 #define USE_EXPAT
59 
60 /* Define to do all expat calls via pyexpat's embedded expat library */
61 /* #define USE_PYEXPAT_CAPI */
62 
63 /* An element can hold this many children without extra memory
64    allocations. */
65 #define STATIC_CHILDREN 4
66 
67 /* For best performance, chose a value so that 80-90% of all nodes
68    have no more than the given number of children.  Set this to zero
69    to minimize the size of the element structure itself (this only
70    helps if you have lots of leaf nodes with attributes). */
71 
72 /* Also note that pymalloc always allocates blocks in multiples of
73    eight bytes.  For the current version of cElementTree, this means
74    that the number of children should be an even number, at least on
75    32-bit platforms. */
76 
77 /* -------------------------------------------------------------------- */
78 
79 #if 0
80 static int memory = 0;
81 #define ALLOC(size, comment)\
82 do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
83 #define RELEASE(size, comment)\
84 do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85 #else
86 #define ALLOC(size, comment)
87 #define RELEASE(size, comment)
88 #endif
89 
90 /* compiler tweaks */
91 #if defined(_MSC_VER)
92 #define LOCAL(type) static __inline type __fastcall
93 #else
94 #define LOCAL(type) static type
95 #endif
96 
97 /* compatibility macros */
98 #if (PY_VERSION_HEX < 0x02060000)
99 #define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
100 #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
101 #endif
102 
103 #if (PY_VERSION_HEX < 0x02050000)
104 typedef int Py_ssize_t;
105 #define lenfunc inquiry
106 #endif
107 
108 #if (PY_VERSION_HEX < 0x02040000)
109 #define PyDict_CheckExact PyDict_Check
110 
111 #if !defined(Py_RETURN_NONE)
112 #define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
113 #endif
114 #endif
115 
116 /* macros used to store 'join' flags in string object pointers.  note
117    that all use of text and tail as object pointers must be wrapped in
118    JOIN_OBJ.  see comments in the ElementObject definition for more
119    info. */
120 #define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
121 #define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
122 #define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
123 
124 /* glue functions (see the init function for details) */
125 static PyObject* elementtree_parseerror_obj;
126 static PyObject* elementtree_copyelement_obj;
127 static PyObject* elementtree_deepcopy_obj;
128 static PyObject* elementtree_iter_obj;
129 static PyObject* elementtree_itertext_obj;
130 static PyObject* elementpath_obj;
131 
132 /* helpers */
133 
134 /* Py_SETREF for a PyObject* that uses a join flag. */
135 Py_LOCAL_INLINE(void)
_set_joined_ptr(PyObject ** p,PyObject * new_joined_ptr)136 _set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
137 {
138     PyObject *tmp = JOIN_OBJ(*p);
139     *p = new_joined_ptr;
140     Py_DECREF(tmp);
141 }
142 
143 LOCAL(PyObject*)
deepcopy(PyObject * object,PyObject * memo)144 deepcopy(PyObject* object, PyObject* memo)
145 {
146     /* do a deep copy of the given object */
147 
148     PyObject* args;
149     PyObject* result;
150 
151     if (!elementtree_deepcopy_obj) {
152         PyErr_SetString(
153             PyExc_RuntimeError,
154             "deepcopy helper not found"
155             );
156         return NULL;
157     }
158 
159     args = PyTuple_New(2);
160     if (!args)
161         return NULL;
162 
163     Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
164     Py_INCREF(memo);   PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
165 
166     result = PyObject_CallObject(elementtree_deepcopy_obj, args);
167 
168     Py_DECREF(args);
169 
170     return result;
171 }
172 
173 LOCAL(PyObject*)
list_join(PyObject * list)174 list_join(PyObject* list)
175 {
176     /* join list elements */
177     PyObject* joiner;
178     PyObject* function;
179     PyObject* args;
180     PyObject* result;
181 
182     switch (PyList_GET_SIZE(list)) {
183     case 0:
184         return PyString_FromString("");
185     case 1:
186         result = PyList_GET_ITEM(list, 0);
187         Py_INCREF(result);
188         return result;
189     }
190 
191     /* two or more elements: slice out a suitable separator from the
192        first member, and use that to join the entire list */
193 
194     joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
195     if (!joiner)
196         return NULL;
197 
198     function = PyObject_GetAttrString(joiner, "join");
199     if (!function) {
200         Py_DECREF(joiner);
201         return NULL;
202     }
203 
204     args = PyTuple_New(1);
205     if (!args) {
206         Py_DECREF(function);
207         Py_DECREF(joiner);
208         return NULL;
209     }
210 
211     Py_INCREF(list);
212     PyTuple_SET_ITEM(args, 0, list);
213 
214     result = PyObject_CallObject(function, args);
215 
216     Py_DECREF(args); /* also removes list */
217     Py_DECREF(function);
218     Py_DECREF(joiner);
219 
220     return result;
221 }
222 
223 /* -------------------------------------------------------------------- */
224 /* the element type */
225 
226 typedef struct {
227 
228     /* attributes (a dictionary object), or None if no attributes */
229     PyObject* attrib;
230 
231     /* child elements */
232     int length; /* actual number of items */
233     int allocated; /* allocated items */
234 
235     /* this either points to _children or to a malloced buffer */
236     PyObject* *children;
237 
238     PyObject* _children[STATIC_CHILDREN];
239 
240 } ElementObjectExtra;
241 
242 typedef struct {
243     PyObject_HEAD
244 
245     /* element tag (a string). */
246     PyObject* tag;
247 
248     /* text before first child.  note that this is a tagged pointer;
249        use JOIN_OBJ to get the object pointer.  the join flag is used
250        to distinguish lists created by the tree builder from lists
251        assigned to the attribute by application code; the former
252        should be joined before being returned to the user, the latter
253        should be left intact. */
254     PyObject* text;
255 
256     /* text after this element, in parent.  note that this is a tagged
257        pointer; use JOIN_OBJ to get the object pointer. */
258     PyObject* tail;
259 
260     ElementObjectExtra* extra;
261 
262 } ElementObject;
263 
264 staticforward PyTypeObject Element_Type;
265 
266 #define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
267 
268 /* -------------------------------------------------------------------- */
269 /* element constructor and destructor */
270 
271 LOCAL(int)
element_new_extra(ElementObject * self,PyObject * attrib)272 element_new_extra(ElementObject* self, PyObject* attrib)
273 {
274     self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
275     if (!self->extra)
276         return -1;
277 
278     if (!attrib)
279         attrib = Py_None;
280 
281     Py_INCREF(attrib);
282     self->extra->attrib = attrib;
283 
284     self->extra->length = 0;
285     self->extra->allocated = STATIC_CHILDREN;
286     self->extra->children = self->extra->_children;
287 
288     return 0;
289 }
290 
291 LOCAL(void)
element_dealloc_extra(ElementObject * self)292 element_dealloc_extra(ElementObject* self)
293 {
294     int i;
295 
296     Py_DECREF(self->extra->attrib);
297 
298     for (i = 0; i < self->extra->length; i++)
299         Py_DECREF(self->extra->children[i]);
300 
301     if (self->extra->children != self->extra->_children)
302         PyObject_Free(self->extra->children);
303 
304     PyObject_Free(self->extra);
305 }
306 
307 LOCAL(PyObject*)
element_new(PyObject * tag,PyObject * attrib)308 element_new(PyObject* tag, PyObject* attrib)
309 {
310     ElementObject* self;
311 
312     self = PyObject_New(ElementObject, &Element_Type);
313     if (self == NULL)
314         return NULL;
315 
316     /* use None for empty dictionaries */
317     if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
318         attrib = Py_None;
319 
320     self->extra = NULL;
321 
322     if (attrib != Py_None) {
323 
324         if (element_new_extra(self, attrib) < 0) {
325             PyObject_Del(self);
326             return NULL;
327         }
328 
329         self->extra->length = 0;
330         self->extra->allocated = STATIC_CHILDREN;
331         self->extra->children = self->extra->_children;
332 
333     }
334 
335     Py_INCREF(tag);
336     self->tag = tag;
337 
338     Py_INCREF(Py_None);
339     self->text = Py_None;
340 
341     Py_INCREF(Py_None);
342     self->tail = Py_None;
343 
344     ALLOC(sizeof(ElementObject), "create element");
345 
346     return (PyObject*) self;
347 }
348 
349 LOCAL(int)
element_resize(ElementObject * self,Py_ssize_t extra)350 element_resize(ElementObject* self, Py_ssize_t extra)
351 {
352     Py_ssize_t size;
353     PyObject* *children;
354 
355     /* make sure self->children can hold the given number of extra
356        elements.  set an exception and return -1 if allocation failed */
357 
358     if (!self->extra)
359         element_new_extra(self, NULL);
360 
361     size = self->extra->length + extra;
362 
363     if (size > self->extra->allocated) {
364         /* use Python 2.4's list growth strategy */
365         size = (size >> 3) + (size < 9 ? 3 : 6) + size;
366         /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
367          * which needs at least 4 bytes.
368          * Although it's a false alarm always assume at least one child to
369          * be safe.
370          */
371         size = size ? size : 1;
372         if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
373             goto nomemory;
374         if (size > INT_MAX) {
375             PyErr_SetString(PyExc_OverflowError,
376                             "too many children");
377             return -1;
378         }
379         if (self->extra->children != self->extra->_children) {
380             /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
381              * "children", which needs at least 4 bytes. Although it's a
382              * false alarm always assume at least one child to be safe.
383              */
384             children = PyObject_Realloc(self->extra->children,
385                                         size * sizeof(PyObject*));
386             if (!children)
387                 goto nomemory;
388         } else {
389             children = PyObject_Malloc(size * sizeof(PyObject*));
390             if (!children)
391                 goto nomemory;
392             /* copy existing children from static area to malloc buffer */
393             memcpy(children, self->extra->children,
394                    self->extra->length * sizeof(PyObject*));
395         }
396         self->extra->children = children;
397         self->extra->allocated = size;
398     }
399 
400     return 0;
401 
402   nomemory:
403     PyErr_NoMemory();
404     return -1;
405 }
406 
407 LOCAL(int)
element_add_subelement(ElementObject * self,PyObject * element)408 element_add_subelement(ElementObject* self, PyObject* element)
409 {
410     /* add a child element to a parent */
411 
412     if (element_resize(self, 1) < 0)
413         return -1;
414 
415     Py_INCREF(element);
416     self->extra->children[self->extra->length] = element;
417 
418     self->extra->length++;
419 
420     return 0;
421 }
422 
423 LOCAL(PyObject*)
element_get_attrib(ElementObject * self)424 element_get_attrib(ElementObject* self)
425 {
426     /* return borrowed reference to attrib dictionary */
427     /* note: this function assumes that the extra section exists */
428 
429     PyObject* res = self->extra->attrib;
430 
431     if (res == Py_None) {
432         Py_DECREF(res);
433         /* create missing dictionary */
434         res = PyDict_New();
435         if (!res)
436             return NULL;
437         self->extra->attrib = res;
438     }
439 
440     return res;
441 }
442 
443 LOCAL(PyObject*)
element_get_text(ElementObject * self)444 element_get_text(ElementObject* self)
445 {
446     /* return borrowed reference to text attribute */
447 
448     PyObject *res = self->text;
449 
450     if (JOIN_GET(res)) {
451         res = JOIN_OBJ(res);
452         if (PyList_CheckExact(res)) {
453             PyObject *tmp = list_join(res);
454             if (!tmp)
455                 return NULL;
456             self->text = tmp;
457             Py_DECREF(res);
458             res = tmp;
459         }
460     }
461 
462     return res;
463 }
464 
465 LOCAL(PyObject*)
element_get_tail(ElementObject * self)466 element_get_tail(ElementObject* self)
467 {
468     /* return borrowed reference to text attribute */
469 
470     PyObject *res = self->tail;
471 
472     if (JOIN_GET(res)) {
473         res = JOIN_OBJ(res);
474         if (PyList_CheckExact(res)) {
475             PyObject *tmp = list_join(res);
476             if (!tmp)
477                 return NULL;
478             self->tail = tmp;
479             Py_DECREF(res);
480             res = tmp;
481         }
482     }
483 
484     return res;
485 }
486 
487 static PyObject*
element(PyObject * self,PyObject * args,PyObject * kw)488 element(PyObject* self, PyObject* args, PyObject* kw)
489 {
490     PyObject* elem;
491 
492     PyObject* tag;
493     PyObject* attrib = NULL;
494     if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
495                           &PyDict_Type, &attrib))
496         return NULL;
497 
498     if (attrib || kw) {
499         attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
500         if (!attrib)
501             return NULL;
502         if (kw)
503             PyDict_Update(attrib, kw);
504     } else {
505         Py_INCREF(Py_None);
506         attrib = Py_None;
507     }
508 
509     elem = element_new(tag, attrib);
510 
511     Py_DECREF(attrib);
512 
513     return elem;
514 }
515 
516 static PyObject*
subelement(PyObject * self,PyObject * args,PyObject * kw)517 subelement(PyObject* self, PyObject* args, PyObject* kw)
518 {
519     PyObject* elem;
520 
521     ElementObject* parent;
522     PyObject* tag;
523     PyObject* attrib = NULL;
524     if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
525                           &Element_Type, &parent, &tag,
526                           &PyDict_Type, &attrib))
527         return NULL;
528 
529     if (attrib || kw) {
530         attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
531         if (!attrib)
532             return NULL;
533         if (kw)
534             PyDict_Update(attrib, kw);
535     } else {
536         Py_INCREF(Py_None);
537         attrib = Py_None;
538     }
539 
540     elem = element_new(tag, attrib);
541     Py_DECREF(attrib);
542     if (elem == NULL)
543         return NULL;
544 
545     if (element_add_subelement(parent, elem) < 0) {
546         Py_DECREF(elem);
547         return NULL;
548     }
549 
550     return elem;
551 }
552 
553 static void
element_dealloc(ElementObject * self)554 element_dealloc(ElementObject* self)
555 {
556     if (self->extra)
557         element_dealloc_extra(self);
558 
559     /* discard attributes */
560     Py_DECREF(self->tag);
561     Py_DECREF(JOIN_OBJ(self->text));
562     Py_DECREF(JOIN_OBJ(self->tail));
563 
564     RELEASE(sizeof(ElementObject), "destroy element");
565 
566     PyObject_Del(self);
567 }
568 
569 /* -------------------------------------------------------------------- */
570 /* methods (in alphabetical order) */
571 
572 static PyObject*
element_append(ElementObject * self,PyObject * args)573 element_append(ElementObject* self, PyObject* args)
574 {
575     PyObject* element;
576     if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
577         return NULL;
578 
579     if (element_add_subelement(self, element) < 0)
580         return NULL;
581 
582     Py_RETURN_NONE;
583 }
584 
585 static PyObject*
element_clear(ElementObject * self,PyObject * args)586 element_clear(ElementObject* self, PyObject* args)
587 {
588     if (!PyArg_ParseTuple(args, ":clear"))
589         return NULL;
590 
591     if (self->extra) {
592         element_dealloc_extra(self);
593         self->extra = NULL;
594     }
595 
596     Py_INCREF(Py_None);
597     _set_joined_ptr(&self->text, Py_None);
598 
599     Py_INCREF(Py_None);
600     _set_joined_ptr(&self->tail, Py_None);
601 
602     Py_RETURN_NONE;
603 }
604 
605 static PyObject*
element_copy(ElementObject * self,PyObject * args)606 element_copy(ElementObject* self, PyObject* args)
607 {
608     int i;
609     ElementObject* element;
610 
611     if (!PyArg_ParseTuple(args, ":__copy__"))
612         return NULL;
613 
614     element = (ElementObject*) element_new(
615         self->tag, (self->extra) ? self->extra->attrib : Py_None
616         );
617     if (!element)
618         return NULL;
619 
620     Py_INCREF(JOIN_OBJ(self->text));
621     _set_joined_ptr(&element->text, self->text);
622 
623     Py_INCREF(JOIN_OBJ(self->tail));
624     _set_joined_ptr(&element->tail, self->tail);
625 
626     if (self->extra) {
627 
628         if (element_resize(element, self->extra->length) < 0) {
629             Py_DECREF(element);
630             return NULL;
631         }
632 
633         for (i = 0; i < self->extra->length; i++) {
634             Py_INCREF(self->extra->children[i]);
635             element->extra->children[i] = self->extra->children[i];
636         }
637 
638         element->extra->length = self->extra->length;
639 
640     }
641 
642     return (PyObject*) element;
643 }
644 
645 static PyObject*
element_deepcopy(ElementObject * self,PyObject * args)646 element_deepcopy(ElementObject* self, PyObject* args)
647 {
648     int i;
649     ElementObject* element;
650     PyObject* tag;
651     PyObject* attrib;
652     PyObject* text;
653     PyObject* tail;
654     PyObject* id;
655 
656     PyObject* memo;
657     if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
658         return NULL;
659 
660     tag = deepcopy(self->tag, memo);
661     if (!tag)
662         return NULL;
663 
664     if (self->extra) {
665         attrib = deepcopy(self->extra->attrib, memo);
666         if (!attrib) {
667             Py_DECREF(tag);
668             return NULL;
669         }
670     } else {
671         Py_INCREF(Py_None);
672         attrib = Py_None;
673     }
674 
675     element = (ElementObject*) element_new(tag, attrib);
676 
677     Py_DECREF(tag);
678     Py_DECREF(attrib);
679 
680     if (!element)
681         return NULL;
682 
683     text = deepcopy(JOIN_OBJ(self->text), memo);
684     if (!text)
685         goto error;
686     _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
687 
688     tail = deepcopy(JOIN_OBJ(self->tail), memo);
689     if (!tail)
690         goto error;
691     _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
692 
693     if (self->extra) {
694 
695         if (element_resize(element, self->extra->length) < 0)
696             goto error;
697 
698         for (i = 0; i < self->extra->length; i++) {
699             PyObject* child = deepcopy(self->extra->children[i], memo);
700             if (!child) {
701                 element->extra->length = i;
702                 goto error;
703             }
704             element->extra->children[i] = child;
705         }
706 
707         element->extra->length = self->extra->length;
708 
709     }
710 
711     /* add object to memo dictionary (so deepcopy won't visit it again) */
712     id = PyInt_FromLong((Py_uintptr_t) self);
713     if (!id)
714         goto error;
715 
716     i = PyDict_SetItem(memo, id, (PyObject*) element);
717 
718     Py_DECREF(id);
719 
720     if (i < 0)
721         goto error;
722 
723     return (PyObject*) element;
724 
725   error:
726     Py_DECREF(element);
727     return NULL;
728 }
729 
730 LOCAL(int)
checkpath(PyObject * tag)731 checkpath(PyObject* tag)
732 {
733     Py_ssize_t i;
734     int check = 1;
735 
736     /* check if a tag contains an xpath character */
737 
738 #define PATHCHAR(ch) \
739     (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
740 
741 #if defined(Py_USING_UNICODE)
742     if (PyUnicode_Check(tag)) {
743         Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
744         for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
745             if (p[i] == '{')
746                 check = 0;
747             else if (p[i] == '}')
748                 check = 1;
749             else if (check && PATHCHAR(p[i]))
750                 return 1;
751         }
752         return 0;
753     }
754 #endif
755     if (PyString_Check(tag)) {
756         char *p = PyString_AS_STRING(tag);
757         for (i = 0; i < PyString_GET_SIZE(tag); i++) {
758             if (p[i] == '{')
759                 check = 0;
760             else if (p[i] == '}')
761                 check = 1;
762             else if (check && PATHCHAR(p[i]))
763                 return 1;
764         }
765         return 0;
766     }
767 
768     return 1; /* unknown type; might be path expression */
769 }
770 
771 static PyObject*
element_extend(ElementObject * self,PyObject * args)772 element_extend(ElementObject* self, PyObject* args)
773 {
774     PyObject* seq;
775     Py_ssize_t i;
776 
777     PyObject* seq_in;
778     if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
779         return NULL;
780 
781     seq = PySequence_Fast(seq_in, "");
782     if (!seq) {
783         PyErr_Format(
784             PyExc_TypeError,
785             "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
786             );
787         return NULL;
788     }
789 
790     for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
791         PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
792         if (element_add_subelement(self, element) < 0) {
793             Py_DECREF(seq);
794             return NULL;
795         }
796     }
797 
798     Py_DECREF(seq);
799 
800     Py_RETURN_NONE;
801 }
802 
803 static PyObject*
element_find(ElementObject * self,PyObject * args)804 element_find(ElementObject* self, PyObject* args)
805 {
806     int i;
807 
808     PyObject* tag;
809     PyObject* namespaces = Py_None;
810     if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
811         return NULL;
812 
813     if (checkpath(tag) || namespaces != Py_None)
814         return PyObject_CallMethod(
815             elementpath_obj, "find", "OOO", self, tag, namespaces
816             );
817 
818     if (!self->extra)
819         Py_RETURN_NONE;
820 
821     for (i = 0; i < self->extra->length; i++) {
822         PyObject* item = self->extra->children[i];
823         int rc;
824         if (!Element_CheckExact(item))
825             continue;
826         Py_INCREF(item);
827         rc = PyObject_Compare(((ElementObject*)item)->tag, tag);
828         if (rc == 0)
829             return item;
830         Py_DECREF(item);
831         if (rc < 0 && PyErr_Occurred())
832             return NULL;
833     }
834 
835     Py_RETURN_NONE;
836 }
837 
838 static PyObject*
element_findtext(ElementObject * self,PyObject * args)839 element_findtext(ElementObject* self, PyObject* args)
840 {
841     int i;
842 
843     PyObject* tag;
844     PyObject* default_value = Py_None;
845     PyObject* namespaces = Py_None;
846     if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
847         return NULL;
848 
849     if (checkpath(tag) || namespaces != Py_None)
850         return PyObject_CallMethod(
851             elementpath_obj, "findtext", "OOOO", self, tag, default_value, namespaces
852             );
853 
854     if (!self->extra) {
855         Py_INCREF(default_value);
856         return default_value;
857     }
858 
859     for (i = 0; i < self->extra->length; i++) {
860         ElementObject* item = (ElementObject*) self->extra->children[i];
861         int rc;
862         if (!Element_CheckExact(item))
863             continue;
864         Py_INCREF(item);
865         rc = PyObject_Compare(item->tag, tag);
866         if (rc == 0) {
867             PyObject* text = element_get_text(item);
868             if (text == Py_None) {
869                 Py_DECREF(item);
870                 return PyString_FromString("");
871             }
872             Py_XINCREF(text);
873             Py_DECREF(item);
874             return text;
875         }
876         Py_DECREF(item);
877         if (rc < 0 && PyErr_Occurred())
878             return NULL;
879     }
880 
881     Py_INCREF(default_value);
882     return default_value;
883 }
884 
885 static PyObject*
element_findall(ElementObject * self,PyObject * args)886 element_findall(ElementObject* self, PyObject* args)
887 {
888     int i;
889     PyObject* out;
890 
891     PyObject* tag;
892     PyObject* namespaces = Py_None;
893     if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
894         return NULL;
895 
896     if (checkpath(tag) || namespaces != Py_None)
897         return PyObject_CallMethod(
898             elementpath_obj, "findall", "OOO", self, tag, namespaces
899             );
900 
901     out = PyList_New(0);
902     if (!out)
903         return NULL;
904 
905     if (!self->extra)
906         return out;
907 
908     for (i = 0; i < self->extra->length; i++) {
909         PyObject* item = self->extra->children[i];
910         int rc;
911         if (!Element_CheckExact(item))
912             continue;
913         Py_INCREF(item);
914         rc = PyObject_Compare(((ElementObject*)item)->tag, tag);
915         if (rc == 0)
916             rc = PyList_Append(out, item);
917         Py_DECREF(item);
918         if (rc < 0 && PyErr_Occurred()) {
919             Py_DECREF(out);
920             return NULL;
921         }
922     }
923 
924     return out;
925 }
926 
927 static PyObject*
element_iterfind(ElementObject * self,PyObject * args)928 element_iterfind(ElementObject* self, PyObject* args)
929 {
930     PyObject* tag;
931     PyObject* namespaces = Py_None;
932     if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
933         return NULL;
934 
935     return PyObject_CallMethod(
936         elementpath_obj, "iterfind", "OOO", self, tag, namespaces
937         );
938 }
939 
940 static PyObject*
element_get(ElementObject * self,PyObject * args)941 element_get(ElementObject* self, PyObject* args)
942 {
943     PyObject* value;
944 
945     PyObject* key;
946     PyObject* default_value = Py_None;
947     if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
948         return NULL;
949 
950     if (!self->extra || self->extra->attrib == Py_None)
951         value = default_value;
952     else {
953         value = PyDict_GetItem(self->extra->attrib, key);
954         if (!value)
955             value = default_value;
956     }
957 
958     Py_INCREF(value);
959     return value;
960 }
961 
962 static PyObject*
element_getchildren(ElementObject * self,PyObject * args)963 element_getchildren(ElementObject* self, PyObject* args)
964 {
965     int i;
966     PyObject* list;
967 
968     if (PyErr_WarnPy3k("This method will be removed in future versions.  "
969                        "Use 'list(elem)' or iteration over elem instead.",
970                        1) < 0) {
971         return NULL;
972     }
973 
974     if (!PyArg_ParseTuple(args, ":getchildren"))
975         return NULL;
976 
977     if (!self->extra)
978         return PyList_New(0);
979 
980     list = PyList_New(self->extra->length);
981     if (!list)
982         return NULL;
983 
984     for (i = 0; i < self->extra->length; i++) {
985         PyObject* item = self->extra->children[i];
986         Py_INCREF(item);
987         PyList_SET_ITEM(list, i, item);
988     }
989 
990     return list;
991 }
992 
993 static PyObject*
element_iter_impl(ElementObject * self,PyObject * tag)994 element_iter_impl(ElementObject* self, PyObject* tag)
995 {
996     PyObject* args;
997     PyObject* result;
998 
999     if (!elementtree_iter_obj) {
1000         PyErr_SetString(
1001             PyExc_RuntimeError,
1002             "iter helper not found"
1003             );
1004         return NULL;
1005     }
1006 
1007     args = PyTuple_New(2);
1008     if (!args)
1009         return NULL;
1010 
1011     Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1012     Py_INCREF(tag);  PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
1013 
1014     result = PyObject_CallObject(elementtree_iter_obj, args);
1015 
1016     Py_DECREF(args);
1017 
1018     return result;
1019 }
1020 
1021 static PyObject*
element_iter(ElementObject * self,PyObject * args)1022 element_iter(ElementObject* self, PyObject* args)
1023 {
1024     PyObject* tag = Py_None;
1025     if (!PyArg_ParseTuple(args, "|O:iter", &tag))
1026         return NULL;
1027 
1028     return element_iter_impl(self, tag);
1029 }
1030 
1031 static PyObject*
element_getiterator(ElementObject * self,PyObject * args)1032 element_getiterator(ElementObject* self, PyObject* args)
1033 {
1034     PyObject* tag = Py_None;
1035     if (!PyArg_ParseTuple(args, "|O:getiterator", &tag))
1036         return NULL;
1037 
1038     /* Change for a DeprecationWarning in 1.4 */
1039     if (Py_Py3kWarningFlag &&
1040         PyErr_WarnEx(PyExc_PendingDeprecationWarning,
1041                      "This method will be removed in future versions.  "
1042                      "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1043                      1) < 0) {
1044         return NULL;
1045     }
1046     return element_iter_impl(self, tag);
1047 }
1048 
1049 
1050 static PyObject*
element_itertext(ElementObject * self,PyObject * args)1051 element_itertext(ElementObject* self, PyObject* args)
1052 {
1053     PyObject* result;
1054 
1055     if (!PyArg_ParseTuple(args, ":itertext"))
1056         return NULL;
1057 
1058     if (!elementtree_itertext_obj) {
1059         PyErr_SetString(
1060             PyExc_RuntimeError,
1061             "itertext helper not found"
1062             );
1063         return NULL;
1064     }
1065 
1066     args = PyTuple_New(1);
1067     if (!args)
1068         return NULL;
1069 
1070     Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1071 
1072     result = PyObject_CallObject(elementtree_itertext_obj, args);
1073 
1074     Py_DECREF(args);
1075 
1076     return result;
1077 }
1078 
1079 static PyObject*
element_getitem(PyObject * self_,Py_ssize_t index)1080 element_getitem(PyObject* self_, Py_ssize_t index)
1081 {
1082     ElementObject* self = (ElementObject*) self_;
1083 
1084     if (!self->extra || index < 0 || index >= self->extra->length) {
1085         PyErr_SetString(
1086             PyExc_IndexError,
1087             "child index out of range"
1088             );
1089         return NULL;
1090     }
1091 
1092     Py_INCREF(self->extra->children[index]);
1093     return self->extra->children[index];
1094 }
1095 
1096 static PyObject*
element_insert(ElementObject * self,PyObject * args)1097 element_insert(ElementObject* self, PyObject* args)
1098 {
1099     int i;
1100 
1101     int index;
1102     PyObject* element;
1103     if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1104                           &Element_Type, &element))
1105         return NULL;
1106 
1107     if (!self->extra)
1108         element_new_extra(self, NULL);
1109 
1110     if (index < 0) {
1111         index += self->extra->length;
1112         if (index < 0)
1113             index = 0;
1114     }
1115     if (index > self->extra->length)
1116         index = self->extra->length;
1117 
1118     if (element_resize(self, 1) < 0)
1119         return NULL;
1120 
1121     for (i = self->extra->length; i > index; i--)
1122         self->extra->children[i] = self->extra->children[i-1];
1123 
1124     Py_INCREF(element);
1125     self->extra->children[index] = element;
1126 
1127     self->extra->length++;
1128 
1129     Py_RETURN_NONE;
1130 }
1131 
1132 static PyObject*
element_items(ElementObject * self,PyObject * args)1133 element_items(ElementObject* self, PyObject* args)
1134 {
1135     if (!PyArg_ParseTuple(args, ":items"))
1136         return NULL;
1137 
1138     if (!self->extra || self->extra->attrib == Py_None)
1139         return PyList_New(0);
1140 
1141     return PyDict_Items(self->extra->attrib);
1142 }
1143 
1144 static PyObject*
element_keys(ElementObject * self,PyObject * args)1145 element_keys(ElementObject* self, PyObject* args)
1146 {
1147     if (!PyArg_ParseTuple(args, ":keys"))
1148         return NULL;
1149 
1150     if (!self->extra || self->extra->attrib == Py_None)
1151         return PyList_New(0);
1152 
1153     return PyDict_Keys(self->extra->attrib);
1154 }
1155 
1156 static Py_ssize_t
element_length(ElementObject * self)1157 element_length(ElementObject* self)
1158 {
1159     if (!self->extra)
1160         return 0;
1161 
1162     return self->extra->length;
1163 }
1164 
1165 static PyObject*
element_makeelement(PyObject * self,PyObject * args,PyObject * kw)1166 element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1167 {
1168     PyObject* elem;
1169 
1170     PyObject* tag;
1171     PyObject* attrib;
1172     if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1173         return NULL;
1174 
1175     attrib = PyDict_Copy(attrib);
1176     if (!attrib)
1177         return NULL;
1178 
1179     elem = element_new(tag, attrib);
1180 
1181     Py_DECREF(attrib);
1182 
1183     return elem;
1184 }
1185 
1186 static PyObject*
element_reduce(ElementObject * self,PyObject * args)1187 element_reduce(ElementObject* self, PyObject* args)
1188 {
1189     if (!PyArg_ParseTuple(args, ":__reduce__"))
1190         return NULL;
1191 
1192     /* Hack alert: This method is used to work around a __copy__
1193        problem on certain 2.3 and 2.4 versions.  To save time and
1194        simplify the code, we create the copy in here, and use a dummy
1195        copyelement helper to trick the copy module into doing the
1196        right thing. */
1197 
1198     if (!elementtree_copyelement_obj) {
1199         PyErr_SetString(
1200             PyExc_RuntimeError,
1201             "copyelement helper not found"
1202             );
1203         return NULL;
1204     }
1205 
1206     return Py_BuildValue(
1207         "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1208         );
1209 }
1210 
1211 static PyObject*
element_remove(ElementObject * self,PyObject * args)1212 element_remove(ElementObject* self, PyObject* args)
1213 {
1214     int i;
1215     int rc;
1216     PyObject* element;
1217     PyObject* found;
1218 
1219     if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1220         return NULL;
1221 
1222     if (!self->extra) {
1223         /* element has no children, so raise exception */
1224         PyErr_SetString(
1225             PyExc_ValueError,
1226             "list.remove(x): x not in list"
1227             );
1228         return NULL;
1229     }
1230 
1231     for (i = 0; i < self->extra->length; i++) {
1232         if (self->extra->children[i] == element)
1233             break;
1234         rc = PyObject_Compare(self->extra->children[i], element);
1235         if (rc == 0)
1236             break;
1237         if (rc < 0 && PyErr_Occurred())
1238             return NULL;
1239     }
1240 
1241     if (i >= self->extra->length) {
1242         /* element is not in children, so raise exception */
1243         PyErr_SetString(
1244             PyExc_ValueError,
1245             "list.remove(x): x not in list"
1246             );
1247         return NULL;
1248     }
1249 
1250     found = self->extra->children[i];
1251 
1252     self->extra->length--;
1253     for (; i < self->extra->length; i++)
1254         self->extra->children[i] = self->extra->children[i+1];
1255 
1256     Py_DECREF(found);
1257     Py_RETURN_NONE;
1258 }
1259 
1260 static PyObject*
element_repr(ElementObject * self)1261 element_repr(ElementObject* self)
1262 {
1263     int status;
1264 
1265     if (self->tag == NULL)
1266         return PyUnicode_FromFormat("<Element at %p>", self);
1267 
1268     status = Py_ReprEnter((PyObject *)self);
1269     if (status == 0) {
1270         PyObject *repr, *tag;
1271         tag = PyObject_Repr(self->tag);
1272         if (!tag) {
1273             Py_ReprLeave((PyObject *)self);
1274             return NULL;
1275         }
1276 
1277         repr = PyString_FromFormat("<Element %s at %p>",
1278                                    PyString_AS_STRING(tag), self);
1279         Py_ReprLeave((PyObject *)self);
1280         Py_DECREF(tag);
1281         return repr;
1282     }
1283     if (status > 0)
1284         PyErr_Format(PyExc_RuntimeError,
1285                      "reentrant call inside %s.__repr__",
1286                      Py_TYPE(self)->tp_name);
1287     return NULL;
1288 }
1289 
1290 static PyObject*
element_set(ElementObject * self,PyObject * args)1291 element_set(ElementObject* self, PyObject* args)
1292 {
1293     PyObject* attrib;
1294 
1295     PyObject* key;
1296     PyObject* value;
1297     if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1298         return NULL;
1299 
1300     if (!self->extra)
1301         element_new_extra(self, NULL);
1302 
1303     attrib = element_get_attrib(self);
1304     if (!attrib)
1305         return NULL;
1306 
1307     if (PyDict_SetItem(attrib, key, value) < 0)
1308         return NULL;
1309 
1310     Py_RETURN_NONE;
1311 }
1312 
1313 static int
element_setitem(PyObject * self_,Py_ssize_t index_,PyObject * item)1314 element_setitem(PyObject* self_, Py_ssize_t index_, PyObject* item)
1315 {
1316     ElementObject* self = (ElementObject*) self_;
1317     int i, index;
1318     PyObject* old;
1319 
1320     if (!self->extra || index_ < 0 || index_ >= self->extra->length) {
1321         PyErr_SetString(
1322             PyExc_IndexError,
1323             "child assignment index out of range");
1324         return -1;
1325     }
1326     index = (int)index_;
1327 
1328     old = self->extra->children[index];
1329 
1330     if (item) {
1331         Py_INCREF(item);
1332         self->extra->children[index] = item;
1333     } else {
1334         self->extra->length--;
1335         for (i = index; i < self->extra->length; i++)
1336             self->extra->children[i] = self->extra->children[i+1];
1337     }
1338 
1339     Py_DECREF(old);
1340 
1341     return 0;
1342 }
1343 
1344 static PyObject*
element_subscr(PyObject * self_,PyObject * item)1345 element_subscr(PyObject* self_, PyObject* item)
1346 {
1347     ElementObject* self = (ElementObject*) self_;
1348 
1349 #if (PY_VERSION_HEX < 0x02050000)
1350     if (_PyAnyInt_Check(item)) {
1351         long i = PyInt_AsLong(item);
1352 #else
1353     if (PyIndex_Check(item)) {
1354         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1355 #endif
1356 
1357         if (i == -1 && PyErr_Occurred()) {
1358             return NULL;
1359         }
1360         if (i < 0 && self->extra)
1361             i += self->extra->length;
1362         return element_getitem(self_, i);
1363     }
1364     else if (PySlice_Check(item)) {
1365         Py_ssize_t start, stop, step, slicelen, cur, i;
1366         PyObject* list;
1367 
1368         if (!self->extra)
1369             return PyList_New(0);
1370 
1371         if (_PySlice_Unpack(item, &start, &stop, &step) < 0) {
1372             return NULL;
1373         }
1374         slicelen = _PySlice_AdjustIndices(self->extra->length, &start, &stop,
1375                                          step);
1376 
1377         if (slicelen <= 0)
1378             return PyList_New(0);
1379         else {
1380             list = PyList_New(slicelen);
1381             if (!list)
1382                 return NULL;
1383 
1384             for (cur = start, i = 0; i < slicelen;
1385                  cur += step, i++) {
1386                 PyObject* item = self->extra->children[cur];
1387                 Py_INCREF(item);
1388                 PyList_SET_ITEM(list, i, item);
1389             }
1390 
1391             return list;
1392         }
1393     }
1394     else {
1395         PyErr_SetString(PyExc_TypeError,
1396                 "element indices must be integers");
1397         return NULL;
1398     }
1399 }
1400 
1401 static int
1402 element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1403 {
1404     ElementObject* self = (ElementObject*) self_;
1405 
1406 #if (PY_VERSION_HEX < 0x02050000)
1407     if (_PyAnyInt_Check(item)) {
1408         long i = PyInt_AsLong(item);
1409 #else
1410     if (PyIndex_Check(item)) {
1411         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1412 #endif
1413 
1414         if (i == -1 && PyErr_Occurred()) {
1415             return -1;
1416         }
1417         if (i < 0 && self->extra)
1418             i += self->extra->length;
1419         return element_setitem(self_, i, value);
1420     }
1421     else if (PySlice_Check(item)) {
1422         Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1423 
1424         PyObject* recycle = NULL;
1425         PyObject* seq = NULL;
1426 
1427         if (!self->extra)
1428             element_new_extra(self, NULL);
1429 
1430         if (_PySlice_Unpack(item, &start, &stop, &step) < 0) {
1431             return -1;
1432         }
1433         slicelen = _PySlice_AdjustIndices(self->extra->length, &start, &stop,
1434                                          step);
1435         assert(slicelen <= self->extra->length);
1436 
1437         if (value == NULL)
1438             newlen = 0;
1439         else {
1440             seq = PySequence_Fast(value, "");
1441             if (!seq) {
1442                 PyErr_Format(
1443                     PyExc_TypeError,
1444                     "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1445                     );
1446                 return -1;
1447             }
1448             newlen = PySequence_Size(seq);
1449         }
1450 
1451         if (step !=  1 && newlen != slicelen)
1452         {
1453             Py_XDECREF(seq);
1454             PyErr_Format(PyExc_ValueError,
1455 #if (PY_VERSION_HEX < 0x02050000)
1456                 "attempt to assign sequence of size %d "
1457                 "to extended slice of size %d",
1458                 (int)newlen, (int)slicelen
1459 #else
1460                 "attempt to assign sequence of size %zd "
1461                 "to extended slice of size %zd",
1462                 newlen, slicelen
1463 #endif
1464                 );
1465             return -1;
1466         }
1467 
1468 
1469         /* Resize before creating the recycle bin, to prevent refleaks. */
1470         if (newlen > slicelen) {
1471             if (element_resize(self, newlen - slicelen) < 0) {
1472                 Py_XDECREF(seq);
1473                 return -1;
1474             }
1475         }
1476         assert(newlen - slicelen <= INT_MAX - self->extra->length);
1477         assert(newlen - slicelen >= -self->extra->length);
1478 
1479         if (slicelen > 0) {
1480             /* to avoid recursive calls to this method (via decref), move
1481                old items to the recycle bin here, and get rid of them when
1482                we're done modifying the element */
1483             recycle = PyList_New(slicelen);
1484             if (!recycle) {
1485                 Py_XDECREF(seq);
1486                 return -1;
1487             }
1488             for (cur = start, i = 0; i < slicelen;
1489                  cur += step, i++)
1490                 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1491         }
1492 
1493         if (newlen < slicelen) {
1494             /* delete slice */
1495             for (i = stop; i < self->extra->length; i++)
1496                 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1497         } else if (newlen > slicelen) {
1498             /* insert slice */
1499             for (i = self->extra->length-1; i >= stop; i--)
1500                 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1501         }
1502 
1503         /* replace the slice */
1504         for (cur = start, i = 0; i < newlen;
1505              cur += step, i++) {
1506             PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1507             Py_INCREF(element);
1508             self->extra->children[cur] = element;
1509         }
1510 
1511         self->extra->length += (int)(newlen - slicelen);
1512 
1513         Py_XDECREF(seq);
1514 
1515         /* discard the recycle bin, and everything in it */
1516         Py_XDECREF(recycle);
1517 
1518         return 0;
1519     }
1520     else {
1521         PyErr_SetString(PyExc_TypeError,
1522                 "element indices must be integers");
1523         return -1;
1524     }
1525 }
1526 
1527 static PyMethodDef element_methods[] = {
1528 
1529     {"clear", (PyCFunction) element_clear, METH_VARARGS},
1530 
1531     {"get", (PyCFunction) element_get, METH_VARARGS},
1532     {"set", (PyCFunction) element_set, METH_VARARGS},
1533 
1534     {"find", (PyCFunction) element_find, METH_VARARGS},
1535     {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1536     {"findall", (PyCFunction) element_findall, METH_VARARGS},
1537 
1538     {"append", (PyCFunction) element_append, METH_VARARGS},
1539     {"extend", (PyCFunction) element_extend, METH_VARARGS},
1540     {"insert", (PyCFunction) element_insert, METH_VARARGS},
1541     {"remove", (PyCFunction) element_remove, METH_VARARGS},
1542 
1543     {"iter", (PyCFunction) element_iter, METH_VARARGS},
1544     {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1545     {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1546 
1547     {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS},
1548     {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1549 
1550     {"items", (PyCFunction) element_items, METH_VARARGS},
1551     {"keys", (PyCFunction) element_keys, METH_VARARGS},
1552 
1553     {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1554 
1555     {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1556     {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1557 
1558     /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1559        C objects correctly, so we have to fake it using a __reduce__-
1560        based hack (see the element_reduce implementation above for
1561        details). */
1562 
1563     /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1564        using a runtime test to figure out if we need to fake things
1565        or now (see the init code below).  The following entry is
1566        enabled only if the hack is needed. */
1567 
1568     {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1569 
1570     {NULL, NULL}
1571 };
1572 
1573 static PyObject*
1574 element_getattr(ElementObject* self, char* name)
1575 {
1576     PyObject* res;
1577 
1578     /* handle common attributes first */
1579     if (strcmp(name, "tag") == 0) {
1580         res = self->tag;
1581         Py_INCREF(res);
1582         return res;
1583     } else if (strcmp(name, "text") == 0) {
1584         res = element_get_text(self);
1585         Py_XINCREF(res);
1586         return res;
1587     }
1588 
1589     /* methods */
1590     res = Py_FindMethod(element_methods, (PyObject*) self, name);
1591     if (res)
1592         return res;
1593 
1594     PyErr_Clear();
1595 
1596     /* less common attributes */
1597     if (strcmp(name, "tail") == 0) {
1598         res = element_get_tail(self);
1599     } else if (strcmp(name, "attrib") == 0) {
1600         if (!self->extra)
1601             element_new_extra(self, NULL);
1602         res = element_get_attrib(self);
1603     } else {
1604         PyErr_SetString(PyExc_AttributeError, name);
1605         return NULL;
1606     }
1607 
1608     if (!res)
1609         return NULL;
1610 
1611     Py_INCREF(res);
1612     return res;
1613 }
1614 
1615 static int
1616 element_setattr(ElementObject* self, const char* name, PyObject* value)
1617 {
1618     if (value == NULL) {
1619         PyErr_SetString(
1620             PyExc_AttributeError,
1621             "can't delete element attributes"
1622             );
1623         return -1;
1624     }
1625 
1626     if (strcmp(name, "tag") == 0) {
1627         Py_INCREF(value);
1628         Py_SETREF(self->tag, value);
1629     } else if (strcmp(name, "text") == 0) {
1630         Py_INCREF(value);
1631         _set_joined_ptr(&self->text, value);
1632     } else if (strcmp(name, "tail") == 0) {
1633         Py_INCREF(value);
1634         _set_joined_ptr(&self->tail, value);
1635     } else if (strcmp(name, "attrib") == 0) {
1636         if (!self->extra)
1637             element_new_extra(self, NULL);
1638         Py_INCREF(value);
1639         Py_SETREF(self->extra->attrib, value);
1640     } else {
1641         PyErr_SetString(PyExc_AttributeError, name);
1642         return -1;
1643     }
1644 
1645     return 0;
1646 }
1647 
1648 static PySequenceMethods element_as_sequence = {
1649     (lenfunc) element_length,
1650     0, /* sq_concat */
1651     0, /* sq_repeat */
1652     element_getitem,
1653     0,
1654     element_setitem,
1655     0,
1656 };
1657 
1658 static PyMappingMethods element_as_mapping = {
1659     (lenfunc) element_length,
1660     (binaryfunc) element_subscr,
1661     (objobjargproc) element_ass_subscr,
1662 };
1663 
1664 statichere PyTypeObject Element_Type = {
1665     PyVarObject_HEAD_INIT(NULL, 0)
1666     "Element", sizeof(ElementObject), 0,
1667     /* methods */
1668     (destructor)element_dealloc, /* tp_dealloc */
1669     0, /* tp_print */
1670     (getattrfunc)element_getattr, /* tp_getattr */
1671     (setattrfunc)element_setattr, /* tp_setattr */
1672     0, /* tp_compare */
1673     (reprfunc)element_repr, /* tp_repr */
1674     0, /* tp_as_number */
1675     &element_as_sequence, /* tp_as_sequence */
1676     &element_as_mapping, /* tp_as_mapping */
1677 };
1678 
1679 /* ==================================================================== */
1680 /* the tree builder type */
1681 
1682 typedef struct {
1683     PyObject_HEAD
1684 
1685     PyObject* root; /* root node (first created node) */
1686 
1687     ElementObject* this; /* current node */
1688     ElementObject* last; /* most recently created node */
1689 
1690     PyObject* data; /* data collector (string or list), or NULL */
1691 
1692     PyObject* stack; /* element stack */
1693     Py_ssize_t index; /* current stack size (0=empty) */
1694 
1695     /* element tracing */
1696     PyObject* events; /* list of events, or NULL if not collecting */
1697     PyObject* start_event_obj; /* event objects (NULL to ignore) */
1698     PyObject* end_event_obj;
1699     PyObject* start_ns_event_obj;
1700     PyObject* end_ns_event_obj;
1701 
1702 } TreeBuilderObject;
1703 
1704 staticforward PyTypeObject TreeBuilder_Type;
1705 
1706 #define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
1707 
1708 /* -------------------------------------------------------------------- */
1709 /* constructor and destructor */
1710 
1711 LOCAL(PyObject*)
1712 treebuilder_new(void)
1713 {
1714     TreeBuilderObject* self;
1715 
1716     self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1717     if (self == NULL)
1718         return NULL;
1719 
1720     self->root = NULL;
1721 
1722     Py_INCREF(Py_None);
1723     self->this = (ElementObject*) Py_None;
1724 
1725     Py_INCREF(Py_None);
1726     self->last = (ElementObject*) Py_None;
1727 
1728     self->data = NULL;
1729 
1730     self->stack = PyList_New(20);
1731     self->index = 0;
1732 
1733     self->events = NULL;
1734     self->start_event_obj = self->end_event_obj = NULL;
1735     self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1736 
1737     ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1738 
1739     return (PyObject*) self;
1740 }
1741 
1742 static PyObject*
1743 treebuilder(PyObject* self_, PyObject* args)
1744 {
1745     if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1746         return NULL;
1747 
1748     return treebuilder_new();
1749 }
1750 
1751 static void
1752 treebuilder_dealloc(TreeBuilderObject* self)
1753 {
1754     Py_XDECREF(self->end_ns_event_obj);
1755     Py_XDECREF(self->start_ns_event_obj);
1756     Py_XDECREF(self->end_event_obj);
1757     Py_XDECREF(self->start_event_obj);
1758     Py_XDECREF(self->events);
1759     Py_DECREF(self->stack);
1760     Py_XDECREF(self->data);
1761     Py_DECREF(self->last);
1762     Py_DECREF(self->this);
1763     Py_XDECREF(self->root);
1764 
1765     RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1766 
1767     PyObject_Del(self);
1768 }
1769 
1770 /* -------------------------------------------------------------------- */
1771 /* helpers for handling of arbitrary element-like objects */
1772 
1773 static void
1774 treebuilder_set_element_text_or_tail(PyObject **data, PyObject **dest)
1775 {
1776     PyObject *tmp = JOIN_OBJ(*dest);
1777     *dest = JOIN_SET(*data, PyList_CheckExact(*data));
1778     *data = NULL;
1779     Py_DECREF(tmp);
1780 }
1781 
1782 LOCAL(void)
1783 treebuilder_flush_data(TreeBuilderObject* self)
1784 {
1785     ElementObject *element = self->last;
1786 
1787     if (self->data) {
1788         if (self->this == element) {
1789             treebuilder_set_element_text_or_tail(
1790                 &self->data,
1791                 &element->text);
1792         }
1793         else {
1794             treebuilder_set_element_text_or_tail(
1795                 &self->data,
1796                 &element->tail);
1797         }
1798     }
1799 }
1800 
1801 LOCAL(int)
1802 treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
1803                          PyObject *node)
1804 {
1805     if (action != NULL) {
1806         PyObject *res = PyTuple_Pack(2, action, node);
1807         if (res == NULL)
1808             return -1;
1809         if (PyList_Append(self->events, res) < 0) {
1810             Py_DECREF(res);
1811             return -1;
1812         }
1813         Py_DECREF(res);
1814     }
1815     return 0;
1816 }
1817 
1818 /* -------------------------------------------------------------------- */
1819 /* handlers */
1820 
1821 LOCAL(PyObject*)
1822 treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1823                        PyObject* standalone)
1824 {
1825     Py_RETURN_NONE;
1826 }
1827 
1828 LOCAL(PyObject*)
1829 treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1830                          PyObject* attrib)
1831 {
1832     PyObject* node;
1833     PyObject* this;
1834 
1835     treebuilder_flush_data(self);
1836 
1837     node = element_new(tag, attrib);
1838     if (!node)
1839         return NULL;
1840 
1841     this = (PyObject*) self->this;
1842 
1843     if (this != Py_None) {
1844         if (element_add_subelement((ElementObject*) this, node) < 0)
1845             goto error;
1846     } else {
1847         if (self->root) {
1848             PyErr_SetString(
1849                 elementtree_parseerror_obj,
1850                 "multiple elements on top level"
1851                 );
1852             goto error;
1853         }
1854         Py_INCREF(node);
1855         self->root = node;
1856     }
1857 
1858     if (self->index < PyList_GET_SIZE(self->stack)) {
1859         if (PyList_SetItem(self->stack, self->index, this) < 0)
1860             goto error;
1861         Py_INCREF(this);
1862     } else {
1863         if (PyList_Append(self->stack, this) < 0)
1864             goto error;
1865     }
1866     self->index++;
1867 
1868     Py_INCREF(node);
1869     Py_SETREF(self->this, (ElementObject*) node);
1870 
1871     Py_INCREF(node);
1872     Py_SETREF(self->last, (ElementObject*) node);
1873 
1874     if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
1875         goto error;
1876 
1877     return node;
1878 
1879   error:
1880     Py_DECREF(node);
1881     return NULL;
1882 }
1883 
1884 LOCAL(PyObject*)
1885 treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1886 {
1887     if (!self->data) {
1888         if (self->last == (ElementObject*) Py_None) {
1889             /* ignore calls to data before the first call to start */
1890             Py_RETURN_NONE;
1891         }
1892         /* store the first item as is */
1893         Py_INCREF(data); self->data = data;
1894     } else {
1895         /* more than one item; use a list to collect items */
1896         if (PyString_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1897             PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
1898             /* expat often generates single character data sections; handle
1899                the most common case by resizing the existing string... */
1900             Py_ssize_t size = PyString_GET_SIZE(self->data);
1901             if (_PyString_Resize(&self->data, size + 1) < 0)
1902                 return NULL;
1903             PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
1904         } else if (PyList_CheckExact(self->data)) {
1905             if (PyList_Append(self->data, data) < 0)
1906                 return NULL;
1907         } else {
1908             PyObject* list = PyList_New(2);
1909             if (!list)
1910                 return NULL;
1911             PyList_SET_ITEM(list, 0, self->data);
1912             Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1913             self->data = list;
1914         }
1915     }
1916 
1917     Py_RETURN_NONE;
1918 }
1919 
1920 LOCAL(PyObject*)
1921 treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1922 {
1923     ElementObject *item;
1924 
1925     treebuilder_flush_data(self);
1926 
1927     if (self->index == 0) {
1928         PyErr_SetString(
1929             PyExc_IndexError,
1930             "pop from empty stack"
1931             );
1932         return NULL;
1933     }
1934 
1935     item = self->last;
1936     self->last = self->this;
1937     self->index--;
1938     self->this = (ElementObject *) PyList_GET_ITEM(self->stack, self->index);
1939     Py_INCREF(self->this);
1940     Py_DECREF(item);
1941 
1942     if (treebuilder_append_event(self, self->end_event_obj, (PyObject*)self->last) < 0)
1943         return NULL;
1944 
1945     Py_INCREF(self->last);
1946     return (PyObject*) self->last;
1947 }
1948 
1949 /* -------------------------------------------------------------------- */
1950 /* methods (in alphabetical order) */
1951 
1952 static PyObject*
1953 treebuilder_data(TreeBuilderObject* self, PyObject* args)
1954 {
1955     PyObject* data;
1956     if (!PyArg_ParseTuple(args, "O:data", &data))
1957         return NULL;
1958 
1959     return treebuilder_handle_data(self, data);
1960 }
1961 
1962 static PyObject*
1963 treebuilder_end(TreeBuilderObject* self, PyObject* args)
1964 {
1965     PyObject* tag;
1966     if (!PyArg_ParseTuple(args, "O:end", &tag))
1967         return NULL;
1968 
1969     return treebuilder_handle_end(self, tag);
1970 }
1971 
1972 LOCAL(PyObject*)
1973 treebuilder_done(TreeBuilderObject* self)
1974 {
1975     PyObject* res;
1976 
1977     /* FIXME: check stack size? */
1978 
1979     if (self->root)
1980         res = self->root;
1981     else
1982         res = Py_None;
1983 
1984     Py_INCREF(res);
1985     return res;
1986 }
1987 
1988 static PyObject*
1989 treebuilder_close(TreeBuilderObject* self, PyObject* args)
1990 {
1991     if (!PyArg_ParseTuple(args, ":close"))
1992         return NULL;
1993 
1994     return treebuilder_done(self);
1995 }
1996 
1997 static PyObject*
1998 treebuilder_start(TreeBuilderObject* self, PyObject* args)
1999 {
2000     PyObject* tag;
2001     PyObject* attrib = Py_None;
2002     if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2003         return NULL;
2004 
2005     return treebuilder_handle_start(self, tag, attrib);
2006 }
2007 
2008 static PyObject*
2009 treebuilder_xml(TreeBuilderObject* self, PyObject* args)
2010 {
2011     PyObject* encoding;
2012     PyObject* standalone;
2013     if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
2014         return NULL;
2015 
2016     return treebuilder_handle_xml(self, encoding, standalone);
2017 }
2018 
2019 static PyMethodDef treebuilder_methods[] = {
2020     {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2021     {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2022     {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
2023     {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
2024     {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2025     {NULL, NULL}
2026 };
2027 
2028 static PyObject*
2029 treebuilder_getattr(TreeBuilderObject* self, char* name)
2030 {
2031     return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
2032 }
2033 
2034 statichere PyTypeObject TreeBuilder_Type = {
2035     PyVarObject_HEAD_INIT(NULL, 0)
2036     "TreeBuilder", sizeof(TreeBuilderObject), 0,
2037     /* methods */
2038     (destructor)treebuilder_dealloc, /* tp_dealloc */
2039     0, /* tp_print */
2040     (getattrfunc)treebuilder_getattr, /* tp_getattr */
2041 };
2042 
2043 /* ==================================================================== */
2044 /* the expat interface */
2045 
2046 #if defined(USE_EXPAT)
2047 
2048 #include "expat.h"
2049 
2050 #if defined(USE_PYEXPAT_CAPI)
2051 #include "pyexpat.h"
2052 static struct PyExpat_CAPI* expat_capi;
2053 #define EXPAT(func) (expat_capi->func)
2054 #else
2055 #define EXPAT(func) (XML_##func)
2056 #endif
2057 
2058 typedef struct {
2059     PyObject_HEAD
2060 
2061     XML_Parser parser;
2062 
2063     PyObject* target;
2064     PyObject* entity;
2065 
2066     PyObject* names;
2067 
2068     PyObject* handle_xml;
2069 
2070     PyObject* handle_start;
2071     PyObject* handle_data;
2072     PyObject* handle_end;
2073 
2074     PyObject* handle_comment;
2075     PyObject* handle_pi;
2076 
2077     PyObject* handle_close;
2078 
2079 } XMLParserObject;
2080 
2081 staticforward PyTypeObject XMLParser_Type;
2082 
2083 /* helpers */
2084 
2085 #if defined(Py_USING_UNICODE)
2086 LOCAL(int)
2087 checkstring(const char* string, int size)
2088 {
2089     int i;
2090 
2091     /* check if an 8-bit string contains UTF-8 characters */
2092     for (i = 0; i < size; i++)
2093         if (string[i] & 0x80)
2094             return 1;
2095 
2096     return 0;
2097 }
2098 #endif
2099 
2100 LOCAL(PyObject*)
2101 makestring(const char* string, int size)
2102 {
2103     /* convert a UTF-8 string to either a 7-bit ascii string or a
2104        Unicode string */
2105 
2106 #if defined(Py_USING_UNICODE)
2107     if (checkstring(string, size))
2108         return PyUnicode_DecodeUTF8(string, size, "strict");
2109 #endif
2110 
2111     return PyString_FromStringAndSize(string, size);
2112 }
2113 
2114 LOCAL(PyObject*)
2115 makeuniversal(XMLParserObject* self, const char* string)
2116 {
2117     /* convert a UTF-8 tag/attribute name from the expat parser
2118        to a universal name string */
2119 
2120     int size = strlen(string);
2121     PyObject* key;
2122     PyObject* value;
2123 
2124     /* look the 'raw' name up in the names dictionary */
2125     key = PyString_FromStringAndSize(string, size);
2126     if (!key)
2127         return NULL;
2128 
2129     value = PyDict_GetItem(self->names, key);
2130 
2131     if (value) {
2132         Py_INCREF(value);
2133     } else {
2134         /* new name.  convert to universal name, and decode as
2135            necessary */
2136 
2137         PyObject* tag;
2138         char* p;
2139         int i;
2140 
2141         /* look for namespace separator */
2142         for (i = 0; i < size; i++)
2143             if (string[i] == '}')
2144                 break;
2145         if (i != size) {
2146             /* convert to universal name */
2147             tag = PyString_FromStringAndSize(NULL, size+1);
2148             p = PyString_AS_STRING(tag);
2149             p[0] = '{';
2150             memcpy(p+1, string, size);
2151             size++;
2152         } else {
2153             /* plain name; use key as tag */
2154             Py_INCREF(key);
2155             tag = key;
2156         }
2157 
2158         /* decode universal name */
2159 #if defined(Py_USING_UNICODE)
2160         /* inline makestring, to avoid duplicating the source string if
2161            it's not a utf-8 string */
2162         p = PyString_AS_STRING(tag);
2163         if (checkstring(p, size)) {
2164             value = PyUnicode_DecodeUTF8(p, size, "strict");
2165             Py_DECREF(tag);
2166             if (!value) {
2167                 Py_DECREF(key);
2168                 return NULL;
2169             }
2170         } else
2171 #endif
2172             value = tag; /* use tag as is */
2173 
2174         /* add to names dictionary */
2175         if (PyDict_SetItem(self->names, key, value) < 0) {
2176             Py_DECREF(key);
2177             Py_DECREF(value);
2178             return NULL;
2179         }
2180     }
2181 
2182     Py_DECREF(key);
2183     return value;
2184 }
2185 
2186 static void
2187 expat_set_error(const char* message, int line, int column)
2188 {
2189     PyObject *error;
2190     PyObject *position;
2191     char buffer[256];
2192 
2193     sprintf(buffer, "%s: line %d, column %d", message, line, column);
2194 
2195     error = PyObject_CallFunction(elementtree_parseerror_obj, "s", buffer);
2196     if (!error)
2197         return;
2198 
2199     /* add position attribute */
2200     position = Py_BuildValue("(ii)", line, column);
2201     if (!position) {
2202         Py_DECREF(error);
2203         return;
2204     }
2205     if (PyObject_SetAttrString(error, "position", position) == -1) {
2206         Py_DECREF(error);
2207         Py_DECREF(position);
2208         return;
2209     }
2210     Py_DECREF(position);
2211 
2212     PyErr_SetObject(elementtree_parseerror_obj, error);
2213     Py_DECREF(error);
2214 }
2215 
2216 /* -------------------------------------------------------------------- */
2217 /* handlers */
2218 
2219 static void
2220 expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2221                       int data_len)
2222 {
2223     PyObject* key;
2224     PyObject* value;
2225     PyObject* res;
2226 
2227     if (data_len < 2 || data_in[0] != '&')
2228         return;
2229 
2230     key = makestring(data_in + 1, data_len - 2);
2231     if (!key)
2232         return;
2233 
2234     value = PyDict_GetItem(self->entity, key);
2235 
2236     if (value) {
2237         if (TreeBuilder_CheckExact(self->target))
2238             res = treebuilder_handle_data(
2239                 (TreeBuilderObject*) self->target, value
2240                 );
2241         else if (self->handle_data)
2242             res = PyObject_CallFunction(self->handle_data, "O", value);
2243         else
2244             res = NULL;
2245         Py_XDECREF(res);
2246     } else if (!PyErr_Occurred()) {
2247         /* Report the first error, not the last */
2248         char message[128];
2249         sprintf(message, "undefined entity &%.100s;", PyString_AS_STRING(key));
2250         expat_set_error(
2251             message,
2252             EXPAT(GetErrorLineNumber)(self->parser),
2253             EXPAT(GetErrorColumnNumber)(self->parser)
2254             );
2255     }
2256 
2257     Py_DECREF(key);
2258 }
2259 
2260 static void
2261 expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2262                     const XML_Char **attrib_in)
2263 {
2264     PyObject* res;
2265     PyObject* tag;
2266     PyObject* attrib;
2267     int ok;
2268 
2269     /* tag name */
2270     tag = makeuniversal(self, tag_in);
2271     if (!tag)
2272         return; /* parser will look for errors */
2273 
2274     /* attributes */
2275     if (attrib_in[0]) {
2276         attrib = PyDict_New();
2277         if (!attrib) {
2278             Py_DECREF(tag);
2279             return;
2280         }
2281         while (attrib_in[0] && attrib_in[1]) {
2282             PyObject* key = makeuniversal(self, attrib_in[0]);
2283             PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
2284             if (!key || !value) {
2285                 Py_XDECREF(value);
2286                 Py_XDECREF(key);
2287                 Py_DECREF(attrib);
2288                 Py_DECREF(tag);
2289                 return;
2290             }
2291             ok = PyDict_SetItem(attrib, key, value);
2292             Py_DECREF(value);
2293             Py_DECREF(key);
2294             if (ok < 0) {
2295                 Py_DECREF(attrib);
2296                 Py_DECREF(tag);
2297                 return;
2298             }
2299             attrib_in += 2;
2300         }
2301     } else {
2302         Py_INCREF(Py_None);
2303         attrib = Py_None;
2304     }
2305 
2306     if (TreeBuilder_CheckExact(self->target))
2307         /* shortcut */
2308         res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2309                                        tag, attrib);
2310     else if (self->handle_start) {
2311         if (attrib == Py_None) {
2312             Py_DECREF(attrib);
2313             attrib = PyDict_New();
2314             if (!attrib) {
2315                 Py_DECREF(tag);
2316                 return;
2317             }
2318         }
2319         res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
2320     } else
2321         res = NULL;
2322 
2323     Py_DECREF(tag);
2324     Py_DECREF(attrib);
2325 
2326     Py_XDECREF(res);
2327 }
2328 
2329 static void
2330 expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2331                    int data_len)
2332 {
2333     PyObject* data;
2334     PyObject* res;
2335 
2336     data = makestring(data_in, data_len);
2337     if (!data)
2338         return; /* parser will look for errors */
2339 
2340     if (TreeBuilder_CheckExact(self->target))
2341         /* shortcut */
2342         res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2343     else if (self->handle_data)
2344         res = PyObject_CallFunction(self->handle_data, "O", data);
2345     else
2346         res = NULL;
2347 
2348     Py_DECREF(data);
2349 
2350     Py_XDECREF(res);
2351 }
2352 
2353 static void
2354 expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2355 {
2356     PyObject* tag;
2357     PyObject* res = NULL;
2358 
2359     if (TreeBuilder_CheckExact(self->target))
2360         /* shortcut */
2361         /* the standard tree builder doesn't look at the end tag */
2362         res = treebuilder_handle_end(
2363             (TreeBuilderObject*) self->target, Py_None
2364             );
2365     else if (self->handle_end) {
2366         tag = makeuniversal(self, tag_in);
2367         if (tag) {
2368             res = PyObject_CallFunction(self->handle_end, "O", tag);
2369             Py_DECREF(tag);
2370         }
2371     }
2372 
2373     Py_XDECREF(res);
2374 }
2375 
2376 static void
2377 expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2378                        const XML_Char *uri)
2379 {
2380     TreeBuilderObject *target = (TreeBuilderObject*) self->target;
2381     PyObject *parcel;
2382     PyObject *sprefix = NULL;
2383     PyObject *suri = NULL;
2384 
2385     if (PyErr_Occurred())
2386         return;
2387 
2388     if (!target->events || !target->start_ns_event_obj)
2389         return;
2390 
2391     if (uri)
2392         suri = makestring(uri, strlen(uri));
2393     else
2394         suri = PyString_FromStringAndSize("", 0);
2395     if (!suri)
2396         return;
2397 
2398     if (prefix)
2399         sprefix = makestring(prefix, strlen(prefix));
2400     else
2401         sprefix = PyString_FromStringAndSize("", 0);
2402     if (!sprefix) {
2403         Py_DECREF(suri);
2404         return;
2405     }
2406 
2407     parcel = PyTuple_Pack(2, sprefix, suri);
2408     Py_DECREF(sprefix);
2409     Py_DECREF(suri);
2410     if (!parcel)
2411         return;
2412     treebuilder_append_event(target, target->start_ns_event_obj, parcel);
2413     Py_DECREF(parcel);
2414 }
2415 
2416 static void
2417 expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2418 {
2419     TreeBuilderObject *target = (TreeBuilderObject*) self->target;
2420 
2421     if (PyErr_Occurred())
2422         return;
2423 
2424     if (!target->events)
2425         return;
2426 
2427     treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
2428 }
2429 
2430 static void
2431 expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2432 {
2433     PyObject* comment;
2434     PyObject* res;
2435 
2436     if (self->handle_comment) {
2437         comment = makestring(comment_in, strlen(comment_in));
2438         if (comment) {
2439             res = PyObject_CallFunction(self->handle_comment, "O", comment);
2440             Py_XDECREF(res);
2441             Py_DECREF(comment);
2442         }
2443     }
2444 }
2445 
2446 static void
2447 expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2448                  const XML_Char* data_in)
2449 {
2450     PyObject* target;
2451     PyObject* data;
2452     PyObject* res;
2453 
2454     if (self->handle_pi) {
2455         target = makestring(target_in, strlen(target_in));
2456         data = makestring(data_in, strlen(data_in));
2457         if (target && data) {
2458             res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2459             Py_XDECREF(res);
2460             Py_DECREF(data);
2461             Py_DECREF(target);
2462         } else {
2463             Py_XDECREF(data);
2464             Py_XDECREF(target);
2465         }
2466     }
2467 }
2468 
2469 #if defined(Py_USING_UNICODE)
2470 static int
2471 expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2472                                XML_Encoding *info)
2473 {
2474     PyObject* u;
2475     Py_UNICODE* p;
2476     unsigned char s[256];
2477     int i;
2478 
2479     memset(info, 0, sizeof(XML_Encoding));
2480 
2481     for (i = 0; i < 256; i++)
2482         s[i] = i;
2483 
2484     u = PyUnicode_Decode((char*) s, 256, name, "replace");
2485     if (!u)
2486         return XML_STATUS_ERROR;
2487 
2488     if (PyUnicode_GET_SIZE(u) != 256) {
2489         Py_DECREF(u);
2490         PyErr_SetString(PyExc_ValueError,
2491                         "multi-byte encodings are not supported");
2492         return XML_STATUS_ERROR;
2493     }
2494 
2495     p = PyUnicode_AS_UNICODE(u);
2496 
2497     for (i = 0; i < 256; i++) {
2498         if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2499             info->map[i] = p[i];
2500         else
2501             info->map[i] = -1;
2502     }
2503 
2504     Py_DECREF(u);
2505 
2506     return XML_STATUS_OK;
2507 }
2508 #endif
2509 
2510 /* -------------------------------------------------------------------- */
2511 /* constructor and destructor */
2512 
2513 static int
2514 ignore_attribute_error(PyObject *value)
2515 {
2516     if (value == NULL) {
2517         if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
2518             return -1;
2519         }
2520         PyErr_Clear();
2521     }
2522     return 0;
2523 }
2524 
2525 static PyObject*
2526 xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
2527 {
2528     XMLParserObject* self;
2529     /* FIXME: does this need to be static? */
2530     static XML_Memory_Handling_Suite memory_handler;
2531 
2532     PyObject* target = NULL;
2533     char* encoding = NULL;
2534     static char* kwlist[] = { "target", "encoding", NULL };
2535     if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2536                                      &target, &encoding))
2537         return NULL;
2538 
2539 #if defined(USE_PYEXPAT_CAPI)
2540     if (!expat_capi) {
2541         PyErr_SetString(
2542             PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2543             );
2544         return NULL;
2545     }
2546 #endif
2547 
2548     self = PyObject_New(XMLParserObject, &XMLParser_Type);
2549     if (self == NULL)
2550         return NULL;
2551 
2552     self->entity = PyDict_New();
2553     if (!self->entity) {
2554         PyObject_Del(self);
2555         return NULL;
2556     }
2557 
2558     self->names = PyDict_New();
2559     if (!self->names) {
2560         PyObject_Del(self->entity);
2561         PyObject_Del(self);
2562         return NULL;
2563     }
2564 
2565     memory_handler.malloc_fcn = PyObject_Malloc;
2566     memory_handler.realloc_fcn = PyObject_Realloc;
2567     memory_handler.free_fcn = PyObject_Free;
2568 
2569     self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2570     if (!self->parser) {
2571         PyObject_Del(self->names);
2572         PyObject_Del(self->entity);
2573         PyObject_Del(self);
2574         PyErr_NoMemory();
2575         return NULL;
2576     }
2577 
2578     ALLOC(sizeof(XMLParserObject), "create expatparser");
2579 
2580     /* Init to NULL to keep the error handling below manageable. */
2581     self->target =
2582         self->handle_xml =
2583         self->handle_start =
2584         self->handle_data =
2585         self->handle_end =
2586         self->handle_comment =
2587         self->handle_pi =
2588         self->handle_close =
2589         NULL;
2590 
2591     /* setup target handlers */
2592     if (!target) {
2593         target = treebuilder_new();
2594         if (!target) {
2595             Py_DECREF(self);
2596             return NULL;
2597         }
2598     } else
2599         Py_INCREF(target);
2600     self->target = target;
2601 
2602     self->handle_xml = PyObject_GetAttrString(target, "xml");
2603     if (ignore_attribute_error(self->handle_xml)) {
2604         Py_DECREF(self);
2605         return NULL;
2606     }
2607     self->handle_start = PyObject_GetAttrString(target, "start");
2608     if (ignore_attribute_error(self->handle_start)) {
2609         Py_DECREF(self);
2610         return NULL;
2611     }
2612     self->handle_data = PyObject_GetAttrString(target, "data");
2613     if (ignore_attribute_error(self->handle_data)) {
2614         Py_DECREF(self);
2615         return NULL;
2616     }
2617     self->handle_end = PyObject_GetAttrString(target, "end");
2618     if (ignore_attribute_error(self->handle_end)) {
2619         Py_DECREF(self);
2620         return NULL;
2621     }
2622     self->handle_comment = PyObject_GetAttrString(target, "comment");
2623     if (ignore_attribute_error(self->handle_comment)) {
2624         Py_DECREF(self);
2625         return NULL;
2626     }
2627     self->handle_pi = PyObject_GetAttrString(target, "pi");
2628     if (ignore_attribute_error(self->handle_pi)) {
2629         Py_DECREF(self);
2630         return NULL;
2631     }
2632     self->handle_close = PyObject_GetAttrString(target, "close");
2633     if (ignore_attribute_error(self->handle_close)) {
2634         Py_DECREF(self);
2635         return NULL;
2636     }
2637 
2638     /* configure parser */
2639     EXPAT(SetUserData)(self->parser, self);
2640     EXPAT(SetElementHandler)(
2641         self->parser,
2642         (XML_StartElementHandler) expat_start_handler,
2643         (XML_EndElementHandler) expat_end_handler
2644         );
2645     EXPAT(SetDefaultHandlerExpand)(
2646         self->parser,
2647         (XML_DefaultHandler) expat_default_handler
2648         );
2649     EXPAT(SetCharacterDataHandler)(
2650         self->parser,
2651         (XML_CharacterDataHandler) expat_data_handler
2652         );
2653     if (self->handle_comment)
2654         EXPAT(SetCommentHandler)(
2655             self->parser,
2656             (XML_CommentHandler) expat_comment_handler
2657             );
2658     if (self->handle_pi)
2659         EXPAT(SetProcessingInstructionHandler)(
2660             self->parser,
2661             (XML_ProcessingInstructionHandler) expat_pi_handler
2662             );
2663 #if defined(Py_USING_UNICODE)
2664     EXPAT(SetUnknownEncodingHandler)(
2665         self->parser,
2666         (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2667         );
2668 #endif
2669 
2670     return (PyObject*) self;
2671 }
2672 
2673 static void
2674 xmlparser_dealloc(XMLParserObject* self)
2675 {
2676     EXPAT(ParserFree)(self->parser);
2677 
2678     Py_XDECREF(self->handle_close);
2679     Py_XDECREF(self->handle_pi);
2680     Py_XDECREF(self->handle_comment);
2681     Py_XDECREF(self->handle_end);
2682     Py_XDECREF(self->handle_data);
2683     Py_XDECREF(self->handle_start);
2684     Py_XDECREF(self->handle_xml);
2685 
2686     Py_DECREF(self->target);
2687     Py_DECREF(self->entity);
2688     Py_DECREF(self->names);
2689 
2690     RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2691 
2692     PyObject_Del(self);
2693 }
2694 
2695 /* -------------------------------------------------------------------- */
2696 /* methods (in alphabetical order) */
2697 
2698 LOCAL(PyObject*)
2699 expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2700 {
2701     int ok;
2702 
2703     ok = EXPAT(Parse)(self->parser, data, data_len, final);
2704 
2705     if (PyErr_Occurred())
2706         return NULL;
2707 
2708     if (!ok) {
2709         expat_set_error(
2710             EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2711             EXPAT(GetErrorLineNumber)(self->parser),
2712             EXPAT(GetErrorColumnNumber)(self->parser)
2713             );
2714         return NULL;
2715     }
2716 
2717     Py_RETURN_NONE;
2718 }
2719 
2720 static PyObject*
2721 xmlparser_close(XMLParserObject* self, PyObject* args)
2722 {
2723     /* end feeding data to parser */
2724 
2725     PyObject* res;
2726     if (!PyArg_ParseTuple(args, ":close"))
2727         return NULL;
2728 
2729     res = expat_parse(self, "", 0, 1);
2730     if (!res)
2731         return NULL;
2732 
2733     if (TreeBuilder_CheckExact(self->target)) {
2734         Py_DECREF(res);
2735         return treebuilder_done((TreeBuilderObject*) self->target);
2736     } if (self->handle_close) {
2737         Py_DECREF(res);
2738         return PyObject_CallFunction(self->handle_close, "");
2739     } else
2740         return res;
2741 }
2742 
2743 static PyObject*
2744 xmlparser_feed(XMLParserObject* self, PyObject* args)
2745 {
2746     /* feed data to parser */
2747 
2748     char* data;
2749     int data_len;
2750     if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2751         return NULL;
2752 
2753     return expat_parse(self, data, data_len, 0);
2754 }
2755 
2756 static PyObject*
2757 xmlparser_parse(XMLParserObject* self, PyObject* args)
2758 {
2759     /* (internal) parse until end of input stream */
2760 
2761     PyObject* reader;
2762     PyObject* buffer;
2763     PyObject* res;
2764 
2765     PyObject* fileobj;
2766     if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2767         return NULL;
2768 
2769     reader = PyObject_GetAttrString(fileobj, "read");
2770     if (!reader)
2771         return NULL;
2772 
2773     /* read from open file object */
2774     for (;;) {
2775 
2776         buffer = PyObject_CallFunction(reader, "i", 64*1024);
2777 
2778         if (!buffer) {
2779             /* read failed (e.g. due to KeyboardInterrupt) */
2780             Py_DECREF(reader);
2781             return NULL;
2782         }
2783 
2784         if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
2785             Py_DECREF(buffer);
2786             break;
2787         }
2788 
2789         if (PyString_GET_SIZE(buffer) > INT_MAX) {
2790             Py_DECREF(buffer);
2791             Py_DECREF(reader);
2792             PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
2793             return NULL;
2794         }
2795         res = expat_parse(
2796             self, PyString_AS_STRING(buffer), (int)PyString_GET_SIZE(buffer), 0
2797             );
2798 
2799         Py_DECREF(buffer);
2800 
2801         if (!res) {
2802             Py_DECREF(reader);
2803             return NULL;
2804         }
2805         Py_DECREF(res);
2806 
2807     }
2808 
2809     Py_DECREF(reader);
2810 
2811     res = expat_parse(self, "", 0, 1);
2812 
2813     if (res && TreeBuilder_CheckExact(self->target)) {
2814         Py_DECREF(res);
2815         return treebuilder_done((TreeBuilderObject*) self->target);
2816     }
2817 
2818     return res;
2819 }
2820 
2821 static PyObject*
2822 xmlparser_setevents(XMLParserObject* self, PyObject* args)
2823 {
2824     /* activate element event reporting */
2825 
2826     Py_ssize_t i;
2827     TreeBuilderObject* target;
2828 
2829     PyObject* events; /* event collector */
2830     PyObject* event_set = Py_None;
2831     if (!PyArg_ParseTuple(args, "O!|O:_setevents",  &PyList_Type, &events,
2832                           &event_set))
2833         return NULL;
2834 
2835     if (!TreeBuilder_CheckExact(self->target)) {
2836         PyErr_SetString(
2837             PyExc_TypeError,
2838             "event handling only supported for cElementTree.Treebuilder "
2839             "targets"
2840             );
2841         return NULL;
2842     }
2843 
2844     target = (TreeBuilderObject*) self->target;
2845 
2846     Py_INCREF(events);
2847     Py_XSETREF(target->events, events);
2848 
2849     /* clear out existing events */
2850     Py_CLEAR(target->start_event_obj);
2851     Py_CLEAR(target->end_event_obj);
2852     Py_CLEAR(target->start_ns_event_obj);
2853     Py_CLEAR(target->end_ns_event_obj);
2854 
2855     if (event_set == Py_None) {
2856         /* default is "end" only */
2857         target->end_event_obj = PyString_FromString("end");
2858         Py_RETURN_NONE;
2859     }
2860 
2861     if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2862         goto error;
2863 
2864     for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2865         PyObject* item = PyTuple_GET_ITEM(event_set, i);
2866         char* event;
2867         if (!PyString_Check(item))
2868             goto error;
2869         Py_INCREF(item);
2870         event = PyString_AS_STRING(item);
2871         if (strcmp(event, "start") == 0) {
2872             Py_XSETREF(target->start_event_obj, item);
2873         } else if (strcmp(event, "end") == 0) {
2874             Py_XSETREF(target->end_event_obj, item);
2875         } else if (strcmp(event, "start-ns") == 0) {
2876             Py_XSETREF(target->start_ns_event_obj, item);
2877             EXPAT(SetNamespaceDeclHandler)(
2878                 self->parser,
2879                 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2880                 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2881                 );
2882         } else if (strcmp(event, "end-ns") == 0) {
2883             Py_XSETREF(target->end_ns_event_obj, item);
2884             EXPAT(SetNamespaceDeclHandler)(
2885                 self->parser,
2886                 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2887                 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2888                 );
2889         } else {
2890             Py_DECREF(item);
2891             PyErr_Format(
2892                 PyExc_ValueError,
2893                 "unknown event '%s'", event
2894                 );
2895             return NULL;
2896         }
2897     }
2898 
2899     Py_RETURN_NONE;
2900 
2901   error:
2902     PyErr_SetString(
2903         PyExc_TypeError,
2904         "invalid event tuple"
2905         );
2906     return NULL;
2907 }
2908 
2909 static PyMethodDef xmlparser_methods[] = {
2910     {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2911     {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2912     {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2913     {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2914     {NULL, NULL}
2915 };
2916 
2917 static PyObject*
2918 xmlparser_getattr(XMLParserObject* self, char* name)
2919 {
2920     PyObject* res;
2921 
2922     res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2923     if (res)
2924         return res;
2925 
2926     PyErr_Clear();
2927 
2928     if (strcmp(name, "entity") == 0)
2929         res = self->entity;
2930     else if (strcmp(name, "target") == 0)
2931         res = self->target;
2932     else if (strcmp(name, "version") == 0) {
2933         char buffer[100];
2934         sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2935                 XML_MINOR_VERSION, XML_MICRO_VERSION);
2936         return PyString_FromString(buffer);
2937     } else {
2938         PyErr_SetString(PyExc_AttributeError, name);
2939         return NULL;
2940     }
2941 
2942     Py_INCREF(res);
2943     return res;
2944 }
2945 
2946 statichere PyTypeObject XMLParser_Type = {
2947     PyVarObject_HEAD_INIT(NULL, 0)
2948     "XMLParser", sizeof(XMLParserObject), 0,
2949     /* methods */
2950     (destructor)xmlparser_dealloc, /* tp_dealloc */
2951     0, /* tp_print */
2952     (getattrfunc)xmlparser_getattr, /* tp_getattr */
2953 };
2954 
2955 #endif
2956 
2957 /* ==================================================================== */
2958 /* python module interface */
2959 
2960 static PyMethodDef _functions[] = {
2961     {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2962     {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2963     {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2964 #if defined(USE_EXPAT)
2965     {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2966     {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2967 #endif
2968     {NULL, NULL}
2969 };
2970 
2971 DL_EXPORT(void)
2972 init_elementtree(void)
2973 {
2974     PyObject* m;
2975     PyObject* g;
2976     char* bootstrap;
2977 
2978     /* Patch object type */
2979     Py_TYPE(&Element_Type) = Py_TYPE(&TreeBuilder_Type) = &PyType_Type;
2980 #if defined(USE_EXPAT)
2981     Py_TYPE(&XMLParser_Type) = &PyType_Type;
2982 #endif
2983 
2984     m = Py_InitModule("_elementtree", _functions);
2985     if (!m)
2986         return;
2987 
2988     /* python glue code */
2989 
2990     g = PyDict_New();
2991     if (!g)
2992         return;
2993 
2994     PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2995 
2996     bootstrap = (
2997 
2998         "from copy import copy, deepcopy\n"
2999 
3000         "try:\n"
3001         "  from xml.etree import ElementTree\n"
3002         "except ImportError:\n"
3003         "  import ElementTree\n"
3004         "ET = ElementTree\n"
3005         "del ElementTree\n"
3006 
3007         "import _elementtree as cElementTree\n"
3008 
3009         "try:\n" /* check if copy works as is */
3010         "  copy(cElementTree.Element('x'))\n"
3011         "except:\n"
3012         "  def copyelement(elem):\n"
3013         "    return elem\n"
3014 
3015         "class CommentProxy:\n"
3016         " def __call__(self, text=None):\n"
3017         "  element = cElementTree.Element(ET.Comment)\n"
3018         "  element.text = text\n"
3019         "  return element\n"
3020         " def __cmp__(self, other):\n"
3021         "  return cmp(ET.Comment, other)\n"
3022         "cElementTree.Comment = CommentProxy()\n"
3023 
3024         "class ElementTree(ET.ElementTree):\n" /* public */
3025         "  def parse(self, source, parser=None):\n"
3026         "    close_source = False\n"
3027         "    if not hasattr(source, 'read'):\n"
3028         "      source = open(source, 'rb')\n"
3029         "      close_source = False\n"
3030         "    try:\n"
3031         "      if parser is not None:\n"
3032         "        while 1:\n"
3033         "          data = source.read(65536)\n"
3034         "          if not data:\n"
3035         "            break\n"
3036         "          parser.feed(data)\n"
3037         "        self._root = parser.close()\n"
3038         "      else:\n"
3039         "        parser = cElementTree.XMLParser()\n"
3040         "        self._root = parser._parse(source)\n"
3041         "      return self._root\n"
3042         "    finally:\n"
3043         "      if close_source:\n"
3044         "        source.close()\n"
3045         "cElementTree.ElementTree = ElementTree\n"
3046 
3047         "def iter(node, tag=None):\n" /* helper */
3048         "  if tag == '*':\n"
3049         "    tag = None\n"
3050         "  if tag is None or node.tag == tag:\n"
3051         "    yield node\n"
3052         "  for node in node:\n"
3053         "    for node in iter(node, tag):\n"
3054         "      yield node\n"
3055 
3056         "def itertext(node):\n" /* helper */
3057         "  if node.text:\n"
3058         "    yield node.text\n"
3059         "  for e in node:\n"
3060         "    for s in e.itertext():\n"
3061         "      yield s\n"
3062         "    if e.tail:\n"
3063         "      yield e.tail\n"
3064 
3065         "def parse(source, parser=None):\n" /* public */
3066         "  tree = ElementTree()\n"
3067         "  tree.parse(source, parser)\n"
3068         "  return tree\n"
3069         "cElementTree.parse = parse\n"
3070 
3071         "class iterparse(object):\n"
3072         " root = None\n"
3073         " def __init__(self, file, events=None):\n"
3074         "  self._close_file = False\n"
3075         "  if not hasattr(file, 'read'):\n"
3076         "    file = open(file, 'rb')\n"
3077         "    self._close_file = True\n"
3078         "  self._file = file\n"
3079         "  self._events = []\n"
3080         "  self._index = 0\n"
3081         "  self._error = None\n"
3082         "  self.root = self._root = None\n"
3083         "  b = cElementTree.TreeBuilder()\n"
3084         "  self._parser = cElementTree.XMLParser(b)\n"
3085         "  self._parser._setevents(self._events, events)\n"
3086         " def next(self):\n"
3087         "  while 1:\n"
3088         "    try:\n"
3089         "      item = self._events[self._index]\n"
3090         "      self._index += 1\n"
3091         "      return item\n"
3092         "    except IndexError:\n"
3093         "      pass\n"
3094         "    if self._error:\n"
3095         "      e = self._error\n"
3096         "      self._error = None\n"
3097         "      raise e\n"
3098         "    if self._parser is None:\n"
3099         "      self.root = self._root\n"
3100         "      if self._close_file:\n"
3101         "        self._file.close()\n"
3102         "      raise StopIteration\n"
3103         "    # load event buffer\n"
3104         "    del self._events[:]\n"
3105         "    self._index = 0\n"
3106         "    data = self._file.read(16384)\n"
3107         "    if data:\n"
3108         "      try:\n"
3109         "        self._parser.feed(data)\n"
3110         "      except SyntaxError as exc:\n"
3111         "        self._error = exc\n"
3112         "    else:\n"
3113         "      self._root = self._parser.close()\n"
3114         "      self._parser = None\n"
3115         " def __iter__(self):\n"
3116         "  return self\n"
3117         "cElementTree.iterparse = iterparse\n"
3118 
3119         "class PIProxy:\n"
3120         " def __call__(self, target, text=None):\n"
3121         "  element = cElementTree.Element(ET.PI)\n"
3122         "  element.text = target\n"
3123         "  if text:\n"
3124         "    element.text = element.text + ' ' + text\n"
3125         "  return element\n"
3126         " def __cmp__(self, other):\n"
3127         "  return cmp(ET.PI, other)\n"
3128         "cElementTree.PI = cElementTree.ProcessingInstruction = PIProxy()\n"
3129 
3130         "def XML(text):\n" /* public */
3131         "  parser = cElementTree.XMLParser()\n"
3132         "  parser.feed(text)\n"
3133         "  return parser.close()\n"
3134         "cElementTree.XML = cElementTree.fromstring = XML\n"
3135 
3136         "def XMLID(text):\n" /* public */
3137         "  tree = XML(text)\n"
3138         "  ids = {}\n"
3139         "  for elem in tree.iter():\n"
3140         "    id = elem.get('id')\n"
3141         "    if id:\n"
3142         "      ids[id] = elem\n"
3143         "  return tree, ids\n"
3144         "cElementTree.XMLID = XMLID\n"
3145 
3146         "try:\n"
3147         " register_namespace = ET.register_namespace\n"
3148         "except AttributeError:\n"
3149         " def register_namespace(prefix, uri):\n"
3150         "  ET._namespace_map[uri] = prefix\n"
3151         "cElementTree.register_namespace = register_namespace\n"
3152 
3153         "cElementTree.dump = ET.dump\n"
3154         "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
3155         "cElementTree.iselement = ET.iselement\n"
3156         "cElementTree.QName = ET.QName\n"
3157         "cElementTree.tostring = ET.tostring\n"
3158         "cElementTree.fromstringlist = ET.fromstringlist\n"
3159         "cElementTree.tostringlist = ET.tostringlist\n"
3160         "cElementTree.VERSION = '" VERSION "'\n"
3161         "cElementTree.__version__ = '" VERSION "'\n"
3162 
3163        );
3164 
3165     if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3166         return;
3167 
3168     elementpath_obj = PyDict_GetItemString(g, "ElementPath");
3169 
3170     elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
3171     if (elementtree_copyelement_obj) {
3172         /* reduce hack needed; enable reduce method */
3173         PyMethodDef* mp;
3174         for (mp = element_methods; mp->ml_name; mp++)
3175             if (mp->ml_meth == (PyCFunction) element_reduce) {
3176                 mp->ml_name = "__reduce__";
3177                 break;
3178             }
3179     } else
3180         PyErr_Clear();
3181 
3182     elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
3183     elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3184     elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
3185 
3186 #if defined(USE_PYEXPAT_CAPI)
3187     /* link against pyexpat, if possible */
3188     expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3189     if (expat_capi) {
3190         /* check that it's usable */
3191         if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3192             expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3193             expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3194             expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
3195             expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
3196             expat_capi = NULL;
3197     }
3198 #endif
3199 
3200     elementtree_parseerror_obj = PyErr_NewException(
3201         "cElementTree.ParseError", PyExc_SyntaxError, NULL
3202         );
3203     Py_INCREF(elementtree_parseerror_obj);
3204     PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3205 }
3206