1# xml.etree test.  This file contains enough tests to make sure that
2# all included components work as they should.
3# Large parts are extracted from the upstream test suite.
4
5# IMPORTANT: the same doctests are run from "test_xml_etree_c" in
6# order to ensure consistency between the C implementation and the
7# Python implementation.
8#
9# For this purpose, the module-level "ET" symbol is temporarily
10# monkey-patched when running the "test_xml_etree_c" test suite.
11# Don't re-import "xml.etree.ElementTree" module in the docstring,
12# except if the test is specific to the Python implementation.
13
14import sys
15import cgi
16
17from test import test_support
18from test.test_support import findfile
19
20from xml.etree import ElementTree as ET
21
22SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
23SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
24
25SAMPLE_XML = """\
26<body>
27  <tag class='a'>text</tag>
28  <tag class='b' />
29  <section>
30    <tag class='b' id='inner'>subtext</tag>
31  </section>
32</body>
33"""
34
35SAMPLE_SECTION = """\
36<section>
37  <tag class='b' id='inner'>subtext</tag>
38  <nexttag />
39  <nextsection>
40    <tag />
41  </nextsection>
42</section>
43"""
44
45SAMPLE_XML_NS = """
46<body xmlns="http://effbot.org/ns">
47  <tag>text</tag>
48  <tag />
49  <section>
50    <tag>subtext</tag>
51  </section>
52</body>
53"""
54
55
56def sanity():
57    """
58    Import sanity.
59
60    >>> from xml.etree import ElementTree
61    >>> from xml.etree import ElementInclude
62    >>> from xml.etree import ElementPath
63    """
64
65def check_method(method):
66    if not hasattr(method, '__call__'):
67        print method, "not callable"
68
69def serialize(elem, to_string=True, **options):
70    import StringIO
71    file = StringIO.StringIO()
72    tree = ET.ElementTree(elem)
73    tree.write(file, **options)
74    if to_string:
75        return file.getvalue()
76    else:
77        file.seek(0)
78        return file
79
80def summarize(elem):
81    if elem.tag == ET.Comment:
82        return "<Comment>"
83    return elem.tag
84
85def summarize_list(seq):
86    return [summarize(elem) for elem in seq]
87
88def normalize_crlf(tree):
89    for elem in tree.iter():
90        if elem.text:
91            elem.text = elem.text.replace("\r\n", "\n")
92        if elem.tail:
93            elem.tail = elem.tail.replace("\r\n", "\n")
94
95def check_string(string):
96    len(string)
97    for char in string:
98        if len(char) != 1:
99            print "expected one-character string, got %r" % char
100    new_string = string + ""
101    new_string = string + " "
102    string[:0]
103
104def check_mapping(mapping):
105    len(mapping)
106    keys = mapping.keys()
107    items = mapping.items()
108    for key in keys:
109        item = mapping[key]
110    mapping["key"] = "value"
111    if mapping["key"] != "value":
112        print "expected value string, got %r" % mapping["key"]
113
114def check_element(element):
115    if not ET.iselement(element):
116        print "not an element"
117    if not hasattr(element, "tag"):
118        print "no tag member"
119    if not hasattr(element, "attrib"):
120        print "no attrib member"
121    if not hasattr(element, "text"):
122        print "no text member"
123    if not hasattr(element, "tail"):
124        print "no tail member"
125
126    check_string(element.tag)
127    check_mapping(element.attrib)
128    if element.text is not None:
129        check_string(element.text)
130    if element.tail is not None:
131        check_string(element.tail)
132    for elem in element:
133        check_element(elem)
134
135# --------------------------------------------------------------------
136# element tree tests
137
138def interface():
139    r"""
140    Test element tree interface.
141
142    >>> element = ET.Element("tag")
143    >>> check_element(element)
144    >>> tree = ET.ElementTree(element)
145    >>> check_element(tree.getroot())
146
147    >>> element = ET.Element("t\xe4g", key="value")
148    >>> tree = ET.ElementTree(element)
149    >>> repr(element)   # doctest: +ELLIPSIS
150    "<Element 't\\xe4g' at 0x...>"
151    >>> element = ET.Element("tag", key="value")
152
153    Make sure all standard element methods exist.
154
155    >>> check_method(element.append)
156    >>> check_method(element.extend)
157    >>> check_method(element.insert)
158    >>> check_method(element.remove)
159    >>> check_method(element.getchildren)
160    >>> check_method(element.find)
161    >>> check_method(element.iterfind)
162    >>> check_method(element.findall)
163    >>> check_method(element.findtext)
164    >>> check_method(element.clear)
165    >>> check_method(element.get)
166    >>> check_method(element.set)
167    >>> check_method(element.keys)
168    >>> check_method(element.items)
169    >>> check_method(element.iter)
170    >>> check_method(element.itertext)
171    >>> check_method(element.getiterator)
172
173    These methods return an iterable. See bug 6472.
174
175    >>> check_method(element.iter("tag").next)
176    >>> check_method(element.iterfind("tag").next)
177    >>> check_method(element.iterfind("*").next)
178    >>> check_method(tree.iter("tag").next)
179    >>> check_method(tree.iterfind("tag").next)
180    >>> check_method(tree.iterfind("*").next)
181
182    These aliases are provided:
183
184    >>> assert ET.XML == ET.fromstring
185    >>> assert ET.PI == ET.ProcessingInstruction
186    >>> assert ET.XMLParser == ET.XMLTreeBuilder
187    """
188
189def simpleops():
190    """
191    Basic method sanity checks.
192
193    >>> elem = ET.XML("<body><tag/></body>")
194    >>> serialize(elem)
195    '<body><tag /></body>'
196    >>> e = ET.Element("tag2")
197    >>> elem.append(e)
198    >>> serialize(elem)
199    '<body><tag /><tag2 /></body>'
200    >>> elem.remove(e)
201    >>> serialize(elem)
202    '<body><tag /></body>'
203    >>> elem.insert(0, e)
204    >>> serialize(elem)
205    '<body><tag2 /><tag /></body>'
206    >>> elem.remove(e)
207    >>> elem.extend([e])
208    >>> serialize(elem)
209    '<body><tag /><tag2 /></body>'
210    >>> elem.remove(e)
211
212    >>> element = ET.Element("tag", key="value")
213    >>> serialize(element) # 1
214    '<tag key="value" />'
215    >>> subelement = ET.Element("subtag")
216    >>> element.append(subelement)
217    >>> serialize(element) # 2
218    '<tag key="value"><subtag /></tag>'
219    >>> element.insert(0, subelement)
220    >>> serialize(element) # 3
221    '<tag key="value"><subtag /><subtag /></tag>'
222    >>> element.remove(subelement)
223    >>> serialize(element) # 4
224    '<tag key="value"><subtag /></tag>'
225    >>> element.remove(subelement)
226    >>> serialize(element) # 5
227    '<tag key="value" />'
228    >>> element.remove(subelement)
229    Traceback (most recent call last):
230    ValueError: list.remove(x): x not in list
231    >>> serialize(element) # 6
232    '<tag key="value" />'
233    >>> element[0:0] = [subelement, subelement, subelement]
234    >>> serialize(element[1])
235    '<subtag />'
236    >>> element[1:9] == [element[1], element[2]]
237    True
238    >>> element[:9:2] == [element[0], element[2]]
239    True
240    >>> del element[1:2]
241    >>> serialize(element)
242    '<tag key="value"><subtag /><subtag /></tag>'
243    """
244
245def cdata():
246    """
247    Test CDATA handling (etc).
248
249    >>> serialize(ET.XML("<tag>hello</tag>"))
250    '<tag>hello</tag>'
251    >>> serialize(ET.XML("<tag>&#104;&#101;&#108;&#108;&#111;</tag>"))
252    '<tag>hello</tag>'
253    >>> serialize(ET.XML("<tag><![CDATA[hello]]></tag>"))
254    '<tag>hello</tag>'
255    """
256
257# Only with Python implementation
258def simplefind():
259    """
260    Test find methods using the elementpath fallback.
261
262    >>> from xml.etree import ElementTree
263
264    >>> CurrentElementPath = ElementTree.ElementPath
265    >>> ElementTree.ElementPath = ElementTree._SimpleElementPath()
266    >>> elem = ElementTree.XML(SAMPLE_XML)
267    >>> elem.find("tag").tag
268    'tag'
269    >>> ElementTree.ElementTree(elem).find("tag").tag
270    'tag'
271    >>> elem.findtext("tag")
272    'text'
273    >>> elem.findtext("tog")
274    >>> elem.findtext("tog", "default")
275    'default'
276    >>> ElementTree.ElementTree(elem).findtext("tag")
277    'text'
278    >>> summarize_list(elem.findall("tag"))
279    ['tag', 'tag']
280    >>> summarize_list(elem.findall(".//tag"))
281    ['tag', 'tag', 'tag']
282
283    Path syntax doesn't work in this case.
284
285    >>> elem.find("section/tag")
286    >>> elem.findtext("section/tag")
287    >>> summarize_list(elem.findall("section/tag"))
288    []
289
290    >>> ElementTree.ElementPath = CurrentElementPath
291    """
292
293def find():
294    """
295    Test find methods (including xpath syntax).
296
297    >>> elem = ET.XML(SAMPLE_XML)
298    >>> elem.find("tag").tag
299    'tag'
300    >>> ET.ElementTree(elem).find("tag").tag
301    'tag'
302    >>> elem.find("section/tag").tag
303    'tag'
304    >>> elem.find("./tag").tag
305    'tag'
306    >>> ET.ElementTree(elem).find("./tag").tag
307    'tag'
308    >>> ET.ElementTree(elem).find("/tag").tag
309    'tag'
310    >>> elem[2] = ET.XML(SAMPLE_SECTION)
311    >>> elem.find("section/nexttag").tag
312    'nexttag'
313    >>> ET.ElementTree(elem).find("section/tag").tag
314    'tag'
315    >>> ET.ElementTree(elem).find("tog")
316    >>> ET.ElementTree(elem).find("tog/foo")
317    >>> elem.findtext("tag")
318    'text'
319    >>> elem.findtext("section/nexttag")
320    ''
321    >>> elem.findtext("section/nexttag", "default")
322    ''
323    >>> elem.findtext("tog")
324    >>> elem.findtext("tog", "default")
325    'default'
326    >>> ET.ElementTree(elem).findtext("tag")
327    'text'
328    >>> ET.ElementTree(elem).findtext("tog/foo")
329    >>> ET.ElementTree(elem).findtext("tog/foo", "default")
330    'default'
331    >>> ET.ElementTree(elem).findtext("./tag")
332    'text'
333    >>> ET.ElementTree(elem).findtext("/tag")
334    'text'
335    >>> elem.findtext("section/tag")
336    'subtext'
337    >>> ET.ElementTree(elem).findtext("section/tag")
338    'subtext'
339    >>> summarize_list(elem.findall("."))
340    ['body']
341    >>> summarize_list(elem.findall("tag"))
342    ['tag', 'tag']
343    >>> summarize_list(elem.findall("tog"))
344    []
345    >>> summarize_list(elem.findall("tog/foo"))
346    []
347    >>> summarize_list(elem.findall("*"))
348    ['tag', 'tag', 'section']
349    >>> summarize_list(elem.findall(".//tag"))
350    ['tag', 'tag', 'tag', 'tag']
351    >>> summarize_list(elem.findall("section/tag"))
352    ['tag']
353    >>> summarize_list(elem.findall("section//tag"))
354    ['tag', 'tag']
355    >>> summarize_list(elem.findall("section/*"))
356    ['tag', 'nexttag', 'nextsection']
357    >>> summarize_list(elem.findall("section//*"))
358    ['tag', 'nexttag', 'nextsection', 'tag']
359    >>> summarize_list(elem.findall("section/.//*"))
360    ['tag', 'nexttag', 'nextsection', 'tag']
361    >>> summarize_list(elem.findall("*/*"))
362    ['tag', 'nexttag', 'nextsection']
363    >>> summarize_list(elem.findall("*//*"))
364    ['tag', 'nexttag', 'nextsection', 'tag']
365    >>> summarize_list(elem.findall("*/tag"))
366    ['tag']
367    >>> summarize_list(elem.findall("*/./tag"))
368    ['tag']
369    >>> summarize_list(elem.findall("./tag"))
370    ['tag', 'tag']
371    >>> summarize_list(elem.findall(".//tag"))
372    ['tag', 'tag', 'tag', 'tag']
373    >>> summarize_list(elem.findall("././tag"))
374    ['tag', 'tag']
375    >>> summarize_list(elem.findall(".//tag[@class]"))
376    ['tag', 'tag', 'tag']
377    >>> summarize_list(elem.findall(".//tag[@class='a']"))
378    ['tag']
379    >>> summarize_list(elem.findall(".//tag[@class='b']"))
380    ['tag', 'tag']
381    >>> summarize_list(elem.findall(".//tag[@id]"))
382    ['tag']
383    >>> summarize_list(elem.findall(".//section[tag]"))
384    ['section']
385    >>> summarize_list(elem.findall(".//section[element]"))
386    []
387    >>> summarize_list(elem.findall("../tag"))
388    []
389    >>> summarize_list(elem.findall("section/../tag"))
390    ['tag', 'tag']
391    >>> summarize_list(ET.ElementTree(elem).findall("./tag"))
392    ['tag', 'tag']
393
394    Following example is invalid in 1.2.
395    A leading '*' is assumed in 1.3.
396
397    >>> elem.findall("section//") == elem.findall("section//*")
398    True
399
400    ET's Path module handles this case incorrectly; this gives
401    a warning in 1.3, and the behaviour will be modified in 1.4.
402
403    >>> summarize_list(ET.ElementTree(elem).findall("/tag"))
404    ['tag', 'tag']
405
406    >>> elem = ET.XML(SAMPLE_XML_NS)
407    >>> summarize_list(elem.findall("tag"))
408    []
409    >>> summarize_list(elem.findall("{http://effbot.org/ns}tag"))
410    ['{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag']
411    >>> summarize_list(elem.findall(".//{http://effbot.org/ns}tag"))
412    ['{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag']
413    """
414
415def file_init():
416    """
417    >>> import StringIO
418
419    >>> stringfile = StringIO.StringIO(SAMPLE_XML)
420    >>> tree = ET.ElementTree(file=stringfile)
421    >>> tree.find("tag").tag
422    'tag'
423    >>> tree.find("section/tag").tag
424    'tag'
425
426    >>> tree = ET.ElementTree(file=SIMPLE_XMLFILE)
427    >>> tree.find("element").tag
428    'element'
429    >>> tree.find("element/../empty-element").tag
430    'empty-element'
431    """
432
433def bad_find():
434    """
435    Check bad or unsupported path expressions.
436
437    >>> elem = ET.XML(SAMPLE_XML)
438    >>> elem.findall("/tag")
439    Traceback (most recent call last):
440    SyntaxError: cannot use absolute path on element
441    """
442
443def path_cache():
444    """
445    Check that the path cache behaves sanely.
446
447    >>> elem = ET.XML(SAMPLE_XML)
448    >>> for i in range(10): ET.ElementTree(elem).find('./'+str(i))
449    >>> cache_len_10 = len(ET.ElementPath._cache)
450    >>> for i in range(10): ET.ElementTree(elem).find('./'+str(i))
451    >>> len(ET.ElementPath._cache) == cache_len_10
452    True
453    >>> for i in range(20): ET.ElementTree(elem).find('./'+str(i))
454    >>> len(ET.ElementPath._cache) > cache_len_10
455    True
456    >>> for i in range(600): ET.ElementTree(elem).find('./'+str(i))
457    >>> len(ET.ElementPath._cache) < 500
458    True
459    """
460
461def copy():
462    """
463    Test copy handling (etc).
464
465    >>> import copy
466    >>> e1 = ET.XML("<tag>hello<foo/></tag>")
467    >>> e2 = copy.copy(e1)
468    >>> e3 = copy.deepcopy(e1)
469    >>> e1.find("foo").tag = "bar"
470    >>> serialize(e1)
471    '<tag>hello<bar /></tag>'
472    >>> serialize(e2)
473    '<tag>hello<bar /></tag>'
474    >>> serialize(e3)
475    '<tag>hello<foo /></tag>'
476
477    """
478
479def attrib():
480    """
481    Test attribute handling.
482
483    >>> elem = ET.Element("tag")
484    >>> elem.get("key") # 1.1
485    >>> elem.get("key", "default") # 1.2
486    'default'
487    >>> elem.set("key", "value")
488    >>> elem.get("key") # 1.3
489    'value'
490
491    >>> elem = ET.Element("tag", key="value")
492    >>> elem.get("key") # 2.1
493    'value'
494    >>> elem.attrib # 2.2
495    {'key': 'value'}
496
497    >>> attrib = {"key": "value"}
498    >>> elem = ET.Element("tag", attrib)
499    >>> attrib.clear() # check for aliasing issues
500    >>> elem.get("key") # 3.1
501    'value'
502    >>> elem.attrib # 3.2
503    {'key': 'value'}
504
505    >>> attrib = {"key": "value"}
506    >>> elem = ET.Element("tag", **attrib)
507    >>> attrib.clear() # check for aliasing issues
508    >>> elem.get("key") # 4.1
509    'value'
510    >>> elem.attrib # 4.2
511    {'key': 'value'}
512
513    >>> elem = ET.Element("tag", {"key": "other"}, key="value")
514    >>> elem.get("key") # 5.1
515    'value'
516    >>> elem.attrib # 5.2
517    {'key': 'value'}
518
519    >>> elem = ET.Element('test')
520    >>> elem.text = "aa"
521    >>> elem.set('testa', 'testval')
522    >>> elem.set('testb', 'test2')
523    >>> ET.tostring(elem)
524    '<test testa="testval" testb="test2">aa</test>'
525    >>> sorted(elem.keys())
526    ['testa', 'testb']
527    >>> sorted(elem.items())
528    [('testa', 'testval'), ('testb', 'test2')]
529    >>> elem.attrib['testb']
530    'test2'
531    >>> elem.attrib['testb'] = 'test1'
532    >>> elem.attrib['testc'] = 'test2'
533    >>> ET.tostring(elem)
534    '<test testa="testval" testb="test1" testc="test2">aa</test>'
535    """
536
537def makeelement():
538    """
539    Test makeelement handling.
540
541    >>> elem = ET.Element("tag")
542    >>> attrib = {"key": "value"}
543    >>> subelem = elem.makeelement("subtag", attrib)
544    >>> if subelem.attrib is attrib:
545    ...     print "attrib aliasing"
546    >>> elem.append(subelem)
547    >>> serialize(elem)
548    '<tag><subtag key="value" /></tag>'
549
550    >>> elem.clear()
551    >>> serialize(elem)
552    '<tag />'
553    >>> elem.append(subelem)
554    >>> serialize(elem)
555    '<tag><subtag key="value" /></tag>'
556    >>> elem.extend([subelem, subelem])
557    >>> serialize(elem)
558    '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>'
559    >>> elem[:] = [subelem]
560    >>> serialize(elem)
561    '<tag><subtag key="value" /></tag>'
562    >>> elem[:] = tuple([subelem])
563    >>> serialize(elem)
564    '<tag><subtag key="value" /></tag>'
565
566    """
567
568def parsefile():
569    """
570    Test parsing from file.
571
572    >>> tree = ET.parse(SIMPLE_XMLFILE)
573    >>> normalize_crlf(tree)
574    >>> tree.write(sys.stdout)
575    <root>
576       <element key="value">text</element>
577       <element>text</element>tail
578       <empty-element />
579    </root>
580    >>> tree = ET.parse(SIMPLE_NS_XMLFILE)
581    >>> normalize_crlf(tree)
582    >>> tree.write(sys.stdout)
583    <ns0:root xmlns:ns0="namespace">
584       <ns0:element key="value">text</ns0:element>
585       <ns0:element>text</ns0:element>tail
586       <ns0:empty-element />
587    </ns0:root>
588
589    >>> with open(SIMPLE_XMLFILE) as f:
590    ...     data = f.read()
591
592    >>> parser = ET.XMLParser()
593    >>> parser.version  # doctest: +ELLIPSIS
594    'Expat ...'
595    >>> parser.feed(data)
596    >>> print serialize(parser.close())
597    <root>
598       <element key="value">text</element>
599       <element>text</element>tail
600       <empty-element />
601    </root>
602
603    >>> parser = ET.XMLTreeBuilder() # 1.2 compatibility
604    >>> parser.feed(data)
605    >>> print serialize(parser.close())
606    <root>
607       <element key="value">text</element>
608       <element>text</element>tail
609       <empty-element />
610    </root>
611
612    >>> target = ET.TreeBuilder()
613    >>> parser = ET.XMLParser(target=target)
614    >>> parser.feed(data)
615    >>> print serialize(parser.close())
616    <root>
617       <element key="value">text</element>
618       <element>text</element>tail
619       <empty-element />
620    </root>
621    """
622
623def parseliteral():
624    """
625    >>> element = ET.XML("<html><body>text</body></html>")
626    >>> ET.ElementTree(element).write(sys.stdout)
627    <html><body>text</body></html>
628    >>> element = ET.fromstring("<html><body>text</body></html>")
629    >>> ET.ElementTree(element).write(sys.stdout)
630    <html><body>text</body></html>
631    >>> sequence = ["<html><body>", "text</bo", "dy></html>"]
632    >>> element = ET.fromstringlist(sequence)
633    >>> print ET.tostring(element)
634    <html><body>text</body></html>
635    >>> print "".join(ET.tostringlist(element))
636    <html><body>text</body></html>
637    >>> ET.tostring(element, "ascii")
638    "<?xml version='1.0' encoding='ascii'?>\\n<html><body>text</body></html>"
639    >>> _, ids = ET.XMLID("<html><body>text</body></html>")
640    >>> len(ids)
641    0
642    >>> _, ids = ET.XMLID("<html><body id='body'>text</body></html>")
643    >>> len(ids)
644    1
645    >>> ids["body"].tag
646    'body'
647    """
648
649def iterparse():
650    """
651    Test iterparse interface.
652
653    >>> iterparse = ET.iterparse
654
655    >>> context = iterparse(SIMPLE_XMLFILE)
656    >>> action, elem = next(context)
657    >>> print action, elem.tag
658    end element
659    >>> for action, elem in context:
660    ...   print action, elem.tag
661    end element
662    end empty-element
663    end root
664    >>> context.root.tag
665    'root'
666
667    >>> context = iterparse(SIMPLE_NS_XMLFILE)
668    >>> for action, elem in context:
669    ...   print action, elem.tag
670    end {namespace}element
671    end {namespace}element
672    end {namespace}empty-element
673    end {namespace}root
674
675    >>> events = ()
676    >>> context = iterparse(SIMPLE_XMLFILE, events)
677    >>> for action, elem in context:
678    ...   print action, elem.tag
679
680    >>> events = ()
681    >>> context = iterparse(SIMPLE_XMLFILE, events=events)
682    >>> for action, elem in context:
683    ...   print action, elem.tag
684
685    >>> events = ("start", "end")
686    >>> context = iterparse(SIMPLE_XMLFILE, events)
687    >>> for action, elem in context:
688    ...   print action, elem.tag
689    start root
690    start element
691    end element
692    start element
693    end element
694    start empty-element
695    end empty-element
696    end root
697
698    >>> events = ("start", "end", "start-ns", "end-ns")
699    >>> context = iterparse(SIMPLE_NS_XMLFILE, events)
700    >>> for action, elem in context:
701    ...   if action in ("start", "end"):
702    ...     print action, elem.tag
703    ...   else:
704    ...     print action, elem
705    start-ns ('', 'namespace')
706    start {namespace}root
707    start {namespace}element
708    end {namespace}element
709    start {namespace}element
710    end {namespace}element
711    start {namespace}empty-element
712    end {namespace}empty-element
713    end {namespace}root
714    end-ns None
715
716    >>> events = ("start", "end", "bogus")
717    >>> with open(SIMPLE_XMLFILE, "rb") as f:
718    ...     iterparse(f, events)
719    Traceback (most recent call last):
720    ValueError: unknown event 'bogus'
721
722    >>> import StringIO
723
724    >>> source = StringIO.StringIO(
725    ...     "<?xml version='1.0' encoding='iso-8859-1'?>\\n"
726    ...     "<body xmlns='http://&#233;ffbot.org/ns'\\n"
727    ...     "      xmlns:cl\\xe9='http://effbot.org/ns'>text</body>\\n")
728    >>> events = ("start-ns",)
729    >>> context = iterparse(source, events)
730    >>> for action, elem in context:
731    ...     print action, elem
732    start-ns ('', u'http://\\xe9ffbot.org/ns')
733    start-ns (u'cl\\xe9', 'http://effbot.org/ns')
734
735    >>> source = StringIO.StringIO("<document />junk")
736    >>> try:
737    ...   for action, elem in iterparse(source):
738    ...     print action, elem.tag
739    ... except ET.ParseError, v:
740    ...   print v
741    junk after document element: line 1, column 12
742    """
743
744def writefile():
745    """
746    >>> elem = ET.Element("tag")
747    >>> elem.text = "text"
748    >>> serialize(elem)
749    '<tag>text</tag>'
750    >>> ET.SubElement(elem, "subtag").text = "subtext"
751    >>> serialize(elem)
752    '<tag>text<subtag>subtext</subtag></tag>'
753
754    Test tag suppression
755    >>> elem.tag = None
756    >>> serialize(elem)
757    'text<subtag>subtext</subtag>'
758    >>> elem.insert(0, ET.Comment("comment"))
759    >>> serialize(elem)     # assumes 1.3
760    'text<!--comment--><subtag>subtext</subtag>'
761    >>> elem[0] = ET.PI("key", "value")
762    >>> serialize(elem)
763    'text<?key value?><subtag>subtext</subtag>'
764    """
765
766def custom_builder():
767    """
768    Test parser w. custom builder.
769
770    >>> with open(SIMPLE_XMLFILE) as f:
771    ...     data = f.read()
772    >>> class Builder:
773    ...     def start(self, tag, attrib):
774    ...         print "start", tag
775    ...     def end(self, tag):
776    ...         print "end", tag
777    ...     def data(self, text):
778    ...         pass
779    >>> builder = Builder()
780    >>> parser = ET.XMLParser(target=builder)
781    >>> parser.feed(data)
782    start root
783    start element
784    end element
785    start element
786    end element
787    start empty-element
788    end empty-element
789    end root
790
791    >>> with open(SIMPLE_NS_XMLFILE) as f:
792    ...     data = f.read()
793    >>> class Builder:
794    ...     def start(self, tag, attrib):
795    ...         print "start", tag
796    ...     def end(self, tag):
797    ...         print "end", tag
798    ...     def data(self, text):
799    ...         pass
800    ...     def pi(self, target, data):
801    ...         print "pi", target, repr(data)
802    ...     def comment(self, data):
803    ...         print "comment", repr(data)
804    >>> builder = Builder()
805    >>> parser = ET.XMLParser(target=builder)
806    >>> parser.feed(data)
807    pi pi 'data'
808    comment ' comment '
809    start {namespace}root
810    start {namespace}element
811    end {namespace}element
812    start {namespace}element
813    end {namespace}element
814    start {namespace}empty-element
815    end {namespace}empty-element
816    end {namespace}root
817
818    """
819
820def getchildren():
821    """
822    Test Element.getchildren()
823
824    >>> with open(SIMPLE_XMLFILE, "r") as f:
825    ...     tree = ET.parse(f)
826    >>> for elem in tree.getroot().iter():
827    ...     summarize_list(elem.getchildren())
828    ['element', 'element', 'empty-element']
829    []
830    []
831    []
832    >>> for elem in tree.getiterator():
833    ...     summarize_list(elem.getchildren())
834    ['element', 'element', 'empty-element']
835    []
836    []
837    []
838
839    >>> elem = ET.XML(SAMPLE_XML)
840    >>> len(elem.getchildren())
841    3
842    >>> len(elem[2].getchildren())
843    1
844    >>> elem[:] == elem.getchildren()
845    True
846    >>> child1 = elem[0]
847    >>> child2 = elem[2]
848    >>> del elem[1:2]
849    >>> len(elem.getchildren())
850    2
851    >>> child1 == elem[0]
852    True
853    >>> child2 == elem[1]
854    True
855    >>> elem[0:2] = [child2, child1]
856    >>> child2 == elem[0]
857    True
858    >>> child1 == elem[1]
859    True
860    >>> child1 == elem[0]
861    False
862    >>> elem.clear()
863    >>> elem.getchildren()
864    []
865    """
866
867def writestring():
868    """
869    >>> elem = ET.XML("<html><body>text</body></html>")
870    >>> ET.tostring(elem)
871    '<html><body>text</body></html>'
872    >>> elem = ET.fromstring("<html><body>text</body></html>")
873    >>> ET.tostring(elem)
874    '<html><body>text</body></html>'
875    """
876
877def check_encoding(encoding):
878    """
879    >>> check_encoding("ascii")
880    >>> check_encoding("us-ascii")
881    >>> check_encoding("iso-8859-1")
882    >>> check_encoding("iso-8859-15")
883    >>> check_encoding("cp437")
884    >>> check_encoding("mac-roman")
885    """
886    ET.XML("<?xml version='1.0' encoding='%s'?><xml />" % encoding)
887
888def encoding():
889    r"""
890    Test encoding issues.
891
892    >>> elem = ET.Element("tag")
893    >>> elem.text = u"abc"
894    >>> serialize(elem)
895    '<tag>abc</tag>'
896    >>> serialize(elem, encoding="utf-8")
897    '<tag>abc</tag>'
898    >>> serialize(elem, encoding="us-ascii")
899    '<tag>abc</tag>'
900    >>> serialize(elem, encoding="iso-8859-1")
901    "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>abc</tag>"
902
903    >>> elem.text = "<&\"\'>"
904    >>> serialize(elem)
905    '<tag>&lt;&amp;"\'&gt;</tag>'
906    >>> serialize(elem, encoding="utf-8")
907    '<tag>&lt;&amp;"\'&gt;</tag>'
908    >>> serialize(elem, encoding="us-ascii") # cdata characters
909    '<tag>&lt;&amp;"\'&gt;</tag>'
910    >>> serialize(elem, encoding="iso-8859-1")
911    '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag>&lt;&amp;"\'&gt;</tag>'
912
913    >>> elem.attrib["key"] = "<&\"\'>"
914    >>> elem.text = None
915    >>> serialize(elem)
916    '<tag key="&lt;&amp;&quot;\'&gt;" />'
917    >>> serialize(elem, encoding="utf-8")
918    '<tag key="&lt;&amp;&quot;\'&gt;" />'
919    >>> serialize(elem, encoding="us-ascii")
920    '<tag key="&lt;&amp;&quot;\'&gt;" />'
921    >>> serialize(elem, encoding="iso-8859-1")
922    '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="&lt;&amp;&quot;\'&gt;" />'
923
924    >>> elem.text = u'\xe5\xf6\xf6<>'
925    >>> elem.attrib.clear()
926    >>> serialize(elem)
927    '<tag>&#229;&#246;&#246;&lt;&gt;</tag>'
928    >>> serialize(elem, encoding="utf-8")
929    '<tag>\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;</tag>'
930    >>> serialize(elem, encoding="us-ascii")
931    '<tag>&#229;&#246;&#246;&lt;&gt;</tag>'
932    >>> serialize(elem, encoding="iso-8859-1")
933    "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>\xe5\xf6\xf6&lt;&gt;</tag>"
934
935    >>> elem.attrib["key"] = u'\xe5\xf6\xf6<>'
936    >>> elem.text = None
937    >>> serialize(elem)
938    '<tag key="&#229;&#246;&#246;&lt;&gt;" />'
939    >>> serialize(elem, encoding="utf-8")
940    '<tag key="\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;" />'
941    >>> serialize(elem, encoding="us-ascii")
942    '<tag key="&#229;&#246;&#246;&lt;&gt;" />'
943    >>> serialize(elem, encoding="iso-8859-1")
944    '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="\xe5\xf6\xf6&lt;&gt;" />'
945    """
946
947def methods():
948    r"""
949    Test serialization methods.
950
951    >>> e = ET.XML("<html><link/><script>1 &lt; 2</script></html>")
952    >>> e.tail = "\n"
953    >>> serialize(e)
954    '<html><link /><script>1 &lt; 2</script></html>\n'
955    >>> serialize(e, method=None)
956    '<html><link /><script>1 &lt; 2</script></html>\n'
957    >>> serialize(e, method="xml")
958    '<html><link /><script>1 &lt; 2</script></html>\n'
959    >>> serialize(e, method="html")
960    '<html><link><script>1 < 2</script></html>\n'
961    >>> serialize(e, method="text")
962    '1 < 2\n'
963    """
964
965def iterators():
966    """
967    Test iterators.
968
969    >>> e = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
970    >>> summarize_list(e.iter())
971    ['html', 'body', 'i']
972    >>> summarize_list(e.find("body").iter())
973    ['body', 'i']
974    >>> summarize(next(e.iter()))
975    'html'
976    >>> "".join(e.itertext())
977    'this is a paragraph...'
978    >>> "".join(e.find("body").itertext())
979    'this is a paragraph.'
980    >>> next(e.itertext())
981    'this is a '
982
983    Method iterparse should return an iterator. See bug 6472.
984
985    >>> sourcefile = serialize(e, to_string=False)
986    >>> next(ET.iterparse(sourcefile))  # doctest: +ELLIPSIS
987    ('end', <Element 'i' at 0x...>)
988
989    >>> tree = ET.ElementTree(None)
990    >>> tree.iter()
991    Traceback (most recent call last):
992    AttributeError: 'NoneType' object has no attribute 'iter'
993    """
994
995ENTITY_XML = """\
996<!DOCTYPE points [
997<!ENTITY % user-entities SYSTEM 'user-entities.xml'>
998%user-entities;
999]>
1000<document>&entity;</document>
1001"""
1002
1003def entity():
1004    """
1005    Test entity handling.
1006
1007    1) good entities
1008
1009    >>> e = ET.XML("<document title='&#x8230;'>test</document>")
1010    >>> serialize(e)
1011    '<document title="&#33328;">test</document>'
1012
1013    2) bad entities
1014
1015    >>> ET.XML("<document>&entity;</document>")
1016    Traceback (most recent call last):
1017    ParseError: undefined entity: line 1, column 10
1018
1019    >>> ET.XML(ENTITY_XML)
1020    Traceback (most recent call last):
1021    ParseError: undefined entity &entity;: line 5, column 10
1022
1023    3) custom entity
1024
1025    >>> parser = ET.XMLParser()
1026    >>> parser.entity["entity"] = "text"
1027    >>> parser.feed(ENTITY_XML)
1028    >>> root = parser.close()
1029    >>> serialize(root)
1030    '<document>text</document>'
1031    """
1032
1033def error(xml):
1034    """
1035
1036    Test error handling.
1037
1038    >>> issubclass(ET.ParseError, SyntaxError)
1039    True
1040    >>> error("foo").position
1041    (1, 0)
1042    >>> error("<tag>&foo;</tag>").position
1043    (1, 5)
1044    >>> error("foobar<").position
1045    (1, 6)
1046
1047    """
1048    try:
1049        ET.XML(xml)
1050    except ET.ParseError:
1051        return sys.exc_value
1052
1053def namespace():
1054    """
1055    Test namespace issues.
1056
1057    1) xml namespace
1058
1059    >>> elem = ET.XML("<tag xml:lang='en' />")
1060    >>> serialize(elem) # 1.1
1061    '<tag xml:lang="en" />'
1062
1063    2) other "well-known" namespaces
1064
1065    >>> elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />")
1066    >>> serialize(elem) # 2.1
1067    '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />'
1068
1069    >>> elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />")
1070    >>> serialize(elem) # 2.2
1071    '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />'
1072
1073    >>> elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />")
1074    >>> serialize(elem) # 2.3
1075    '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />'
1076
1077    3) unknown namespaces
1078    >>> elem = ET.XML(SAMPLE_XML_NS)
1079    >>> print serialize(elem)
1080    <ns0:body xmlns:ns0="http://effbot.org/ns">
1081      <ns0:tag>text</ns0:tag>
1082      <ns0:tag />
1083      <ns0:section>
1084        <ns0:tag>subtext</ns0:tag>
1085      </ns0:section>
1086    </ns0:body>
1087    """
1088
1089def qname():
1090    """
1091    Test QName handling.
1092
1093    1) decorated tags
1094
1095    >>> elem = ET.Element("{uri}tag")
1096    >>> serialize(elem) # 1.1
1097    '<ns0:tag xmlns:ns0="uri" />'
1098    >>> elem = ET.Element(ET.QName("{uri}tag"))
1099    >>> serialize(elem) # 1.2
1100    '<ns0:tag xmlns:ns0="uri" />'
1101    >>> elem = ET.Element(ET.QName("uri", "tag"))
1102    >>> serialize(elem) # 1.3
1103    '<ns0:tag xmlns:ns0="uri" />'
1104    >>> elem = ET.Element(ET.QName("uri", "tag"))
1105    >>> subelem = ET.SubElement(elem, ET.QName("uri", "tag1"))
1106    >>> subelem = ET.SubElement(elem, ET.QName("uri", "tag2"))
1107    >>> serialize(elem) # 1.4
1108    '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>'
1109
1110    2) decorated attributes
1111
1112    >>> elem.clear()
1113    >>> elem.attrib["{uri}key"] = "value"
1114    >>> serialize(elem) # 2.1
1115    '<ns0:tag xmlns:ns0="uri" ns0:key="value" />'
1116
1117    >>> elem.clear()
1118    >>> elem.attrib[ET.QName("{uri}key")] = "value"
1119    >>> serialize(elem) # 2.2
1120    '<ns0:tag xmlns:ns0="uri" ns0:key="value" />'
1121
1122    3) decorated values are not converted by default, but the
1123       QName wrapper can be used for values
1124
1125    >>> elem.clear()
1126    >>> elem.attrib["{uri}key"] = "{uri}value"
1127    >>> serialize(elem) # 3.1
1128    '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />'
1129
1130    >>> elem.clear()
1131    >>> elem.attrib["{uri}key"] = ET.QName("{uri}value")
1132    >>> serialize(elem) # 3.2
1133    '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />'
1134
1135    >>> elem.clear()
1136    >>> subelem = ET.Element("tag")
1137    >>> subelem.attrib["{uri1}key"] = ET.QName("{uri2}value")
1138    >>> elem.append(subelem)
1139    >>> elem.append(subelem)
1140    >>> serialize(elem) # 3.3
1141    '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2"><tag ns1:key="ns2:value" /><tag ns1:key="ns2:value" /></ns0:tag>'
1142
1143    4) Direct QName tests
1144
1145    >>> str(ET.QName('ns', 'tag'))
1146    '{ns}tag'
1147    >>> str(ET.QName('{ns}tag'))
1148    '{ns}tag'
1149    >>> q1 = ET.QName('ns', 'tag')
1150    >>> q2 = ET.QName('ns', 'tag')
1151    >>> q1 == q2
1152    True
1153    >>> q2 = ET.QName('ns', 'other-tag')
1154    >>> q1 == q2
1155    False
1156    >>> q1 == 'ns:tag'
1157    False
1158    >>> q1 == '{ns}tag'
1159    True
1160    """
1161
1162def doctype_public():
1163    """
1164    Test PUBLIC doctype.
1165
1166    >>> elem = ET.XML('<!DOCTYPE html PUBLIC'
1167    ...   ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
1168    ...   ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
1169    ...   '<html>text</html>')
1170
1171    """
1172
1173def xpath_tokenizer(p):
1174    """
1175    Test the XPath tokenizer.
1176
1177    >>> # tests from the xml specification
1178    >>> xpath_tokenizer("*")
1179    ['*']
1180    >>> xpath_tokenizer("text()")
1181    ['text', '()']
1182    >>> xpath_tokenizer("@name")
1183    ['@', 'name']
1184    >>> xpath_tokenizer("@*")
1185    ['@', '*']
1186    >>> xpath_tokenizer("para[1]")
1187    ['para', '[', '1', ']']
1188    >>> xpath_tokenizer("para[last()]")
1189    ['para', '[', 'last', '()', ']']
1190    >>> xpath_tokenizer("*/para")
1191    ['*', '/', 'para']
1192    >>> xpath_tokenizer("/doc/chapter[5]/section[2]")
1193    ['/', 'doc', '/', 'chapter', '[', '5', ']', '/', 'section', '[', '2', ']']
1194    >>> xpath_tokenizer("chapter//para")
1195    ['chapter', '//', 'para']
1196    >>> xpath_tokenizer("//para")
1197    ['//', 'para']
1198    >>> xpath_tokenizer("//olist/item")
1199    ['//', 'olist', '/', 'item']
1200    >>> xpath_tokenizer(".")
1201    ['.']
1202    >>> xpath_tokenizer(".//para")
1203    ['.', '//', 'para']
1204    >>> xpath_tokenizer("..")
1205    ['..']
1206    >>> xpath_tokenizer("../@lang")
1207    ['..', '/', '@', 'lang']
1208    >>> xpath_tokenizer("chapter[title]")
1209    ['chapter', '[', 'title', ']']
1210    >>> xpath_tokenizer("employee[@secretary and @assistant]")
1211    ['employee', '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']']
1212
1213    >>> # additional tests
1214    >>> xpath_tokenizer("{http://spam}egg")
1215    ['{http://spam}egg']
1216    >>> xpath_tokenizer("./spam.egg")
1217    ['.', '/', 'spam.egg']
1218    >>> xpath_tokenizer(".//{http://spam}egg")
1219    ['.', '//', '{http://spam}egg']
1220    """
1221    from xml.etree import ElementPath
1222    out = []
1223    for op, tag in ElementPath.xpath_tokenizer(p):
1224        out.append(op or tag)
1225    return out
1226
1227def processinginstruction():
1228    """
1229    Test ProcessingInstruction directly
1230
1231    >>> ET.tostring(ET.ProcessingInstruction('test', 'instruction'))
1232    '<?test instruction?>'
1233    >>> ET.tostring(ET.PI('test', 'instruction'))
1234    '<?test instruction?>'
1235
1236    Issue #2746
1237
1238    >>> ET.tostring(ET.PI('test', '<testing&>'))
1239    '<?test <testing&>?>'
1240    >>> ET.tostring(ET.PI('test', u'<testing&>\xe3'), 'latin1')
1241    "<?xml version='1.0' encoding='latin1'?>\\n<?test <testing&>\\xe3?>"
1242    """
1243
1244#
1245# xinclude tests (samples from appendix C of the xinclude specification)
1246
1247XINCLUDE = {}
1248
1249XINCLUDE["C1.xml"] = """\
1250<?xml version='1.0'?>
1251<document xmlns:xi="http://www.w3.org/2001/XInclude">
1252  <p>120 Mz is adequate for an average home user.</p>
1253  <xi:include href="disclaimer.xml"/>
1254</document>
1255"""
1256
1257XINCLUDE["disclaimer.xml"] = """\
1258<?xml version='1.0'?>
1259<disclaimer>
1260  <p>The opinions represented herein represent those of the individual
1261  and should not be interpreted as official policy endorsed by this
1262  organization.</p>
1263</disclaimer>
1264"""
1265
1266XINCLUDE["C2.xml"] = """\
1267<?xml version='1.0'?>
1268<document xmlns:xi="http://www.w3.org/2001/XInclude">
1269  <p>This document has been accessed
1270  <xi:include href="count.txt" parse="text"/> times.</p>
1271</document>
1272"""
1273
1274XINCLUDE["count.txt"] = "324387"
1275
1276XINCLUDE["C2b.xml"] = """\
1277<?xml version='1.0'?>
1278<document xmlns:xi="http://www.w3.org/2001/XInclude">
1279  <p>This document has been <em>accessed</em>
1280  <xi:include href="count.txt" parse="text"/> times.</p>
1281</document>
1282"""
1283
1284XINCLUDE["C3.xml"] = """\
1285<?xml version='1.0'?>
1286<document xmlns:xi="http://www.w3.org/2001/XInclude">
1287  <p>The following is the source of the "data.xml" resource:</p>
1288  <example><xi:include href="data.xml" parse="text"/></example>
1289</document>
1290"""
1291
1292XINCLUDE["data.xml"] = """\
1293<?xml version='1.0'?>
1294<data>
1295  <item><![CDATA[Brooks & Shields]]></item>
1296</data>
1297"""
1298
1299XINCLUDE["C5.xml"] = """\
1300<?xml version='1.0'?>
1301<div xmlns:xi="http://www.w3.org/2001/XInclude">
1302  <xi:include href="example.txt" parse="text">
1303    <xi:fallback>
1304      <xi:include href="fallback-example.txt" parse="text">
1305        <xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback>
1306      </xi:include>
1307    </xi:fallback>
1308  </xi:include>
1309</div>
1310"""
1311
1312XINCLUDE["default.xml"] = """\
1313<?xml version='1.0'?>
1314<document xmlns:xi="http://www.w3.org/2001/XInclude">
1315  <p>Example.</p>
1316  <xi:include href="{}"/>
1317</document>
1318""".format(cgi.escape(SIMPLE_XMLFILE, True))
1319
1320def xinclude_loader(href, parse="xml", encoding=None):
1321    try:
1322        data = XINCLUDE[href]
1323    except KeyError:
1324        raise IOError("resource not found")
1325    if parse == "xml":
1326        from xml.etree.ElementTree import XML
1327        return XML(data)
1328    return data
1329
1330def xinclude():
1331    r"""
1332    Basic inclusion example (XInclude C.1)
1333
1334    >>> from xml.etree import ElementTree as ET
1335    >>> from xml.etree import ElementInclude
1336
1337    >>> document = xinclude_loader("C1.xml")
1338    >>> ElementInclude.include(document, xinclude_loader)
1339    >>> print serialize(document) # C1
1340    <document>
1341      <p>120 Mz is adequate for an average home user.</p>
1342      <disclaimer>
1343      <p>The opinions represented herein represent those of the individual
1344      and should not be interpreted as official policy endorsed by this
1345      organization.</p>
1346    </disclaimer>
1347    </document>
1348
1349    Textual inclusion example (XInclude C.2)
1350
1351    >>> document = xinclude_loader("C2.xml")
1352    >>> ElementInclude.include(document, xinclude_loader)
1353    >>> print serialize(document) # C2
1354    <document>
1355      <p>This document has been accessed
1356      324387 times.</p>
1357    </document>
1358
1359    Textual inclusion after sibling element (based on modified XInclude C.2)
1360
1361    >>> document = xinclude_loader("C2b.xml")
1362    >>> ElementInclude.include(document, xinclude_loader)
1363    >>> print(serialize(document)) # C2b
1364    <document>
1365      <p>This document has been <em>accessed</em>
1366      324387 times.</p>
1367    </document>
1368
1369    Textual inclusion of XML example (XInclude C.3)
1370
1371    >>> document = xinclude_loader("C3.xml")
1372    >>> ElementInclude.include(document, xinclude_loader)
1373    >>> print serialize(document) # C3
1374    <document>
1375      <p>The following is the source of the "data.xml" resource:</p>
1376      <example>&lt;?xml version='1.0'?&gt;
1377    &lt;data&gt;
1378      &lt;item&gt;&lt;![CDATA[Brooks &amp; Shields]]&gt;&lt;/item&gt;
1379    &lt;/data&gt;
1380    </example>
1381    </document>
1382
1383    Fallback example (XInclude C.5)
1384    Note! Fallback support is not yet implemented
1385
1386    >>> document = xinclude_loader("C5.xml")
1387    >>> ElementInclude.include(document, xinclude_loader)
1388    Traceback (most recent call last):
1389    IOError: resource not found
1390    >>> # print serialize(document) # C5
1391    """
1392
1393def xinclude_default():
1394    """
1395    >>> from xml.etree import ElementInclude
1396
1397    >>> document = xinclude_loader("default.xml")
1398    >>> ElementInclude.include(document)
1399    >>> print serialize(document) # default
1400    <document>
1401      <p>Example.</p>
1402      <root>
1403       <element key="value">text</element>
1404       <element>text</element>tail
1405       <empty-element />
1406    </root>
1407    </document>
1408    """
1409
1410#
1411# badly formatted xi:include tags
1412
1413XINCLUDE_BAD = {}
1414
1415XINCLUDE_BAD["B1.xml"] = """\
1416<?xml version='1.0'?>
1417<document xmlns:xi="http://www.w3.org/2001/XInclude">
1418  <p>120 Mz is adequate for an average home user.</p>
1419  <xi:include href="disclaimer.xml" parse="BAD_TYPE"/>
1420</document>
1421"""
1422
1423XINCLUDE_BAD["B2.xml"] = """\
1424<?xml version='1.0'?>
1425<div xmlns:xi="http://www.w3.org/2001/XInclude">
1426    <xi:fallback></xi:fallback>
1427</div>
1428"""
1429
1430def xinclude_failures():
1431    r"""
1432    Test failure to locate included XML file.
1433
1434    >>> from xml.etree import ElementInclude
1435
1436    >>> def none_loader(href, parser, encoding=None):
1437    ...     return None
1438
1439    >>> document = ET.XML(XINCLUDE["C1.xml"])
1440    >>> ElementInclude.include(document, loader=none_loader)
1441    Traceback (most recent call last):
1442    FatalIncludeError: cannot load 'disclaimer.xml' as 'xml'
1443
1444    Test failure to locate included text file.
1445
1446    >>> document = ET.XML(XINCLUDE["C2.xml"])
1447    >>> ElementInclude.include(document, loader=none_loader)
1448    Traceback (most recent call last):
1449    FatalIncludeError: cannot load 'count.txt' as 'text'
1450
1451    Test bad parse type.
1452
1453    >>> document = ET.XML(XINCLUDE_BAD["B1.xml"])
1454    >>> ElementInclude.include(document, loader=none_loader)
1455    Traceback (most recent call last):
1456    FatalIncludeError: unknown parse type in xi:include tag ('BAD_TYPE')
1457
1458    Test xi:fallback outside xi:include.
1459
1460    >>> document = ET.XML(XINCLUDE_BAD["B2.xml"])
1461    >>> ElementInclude.include(document, loader=none_loader)
1462    Traceback (most recent call last):
1463    FatalIncludeError: xi:fallback tag must be child of xi:include ('{http://www.w3.org/2001/XInclude}fallback')
1464    """
1465
1466# --------------------------------------------------------------------
1467# reported bugs
1468
1469def bug_xmltoolkit21():
1470    """
1471
1472    marshaller gives obscure errors for non-string values
1473
1474    >>> elem = ET.Element(123)
1475    >>> serialize(elem) # tag
1476    Traceback (most recent call last):
1477    TypeError: cannot serialize 123 (type int)
1478    >>> elem = ET.Element("elem")
1479    >>> elem.text = 123
1480    >>> serialize(elem) # text
1481    Traceback (most recent call last):
1482    TypeError: cannot serialize 123 (type int)
1483    >>> elem = ET.Element("elem")
1484    >>> elem.tail = 123
1485    >>> serialize(elem) # tail
1486    Traceback (most recent call last):
1487    TypeError: cannot serialize 123 (type int)
1488    >>> elem = ET.Element("elem")
1489    >>> elem.set(123, "123")
1490    >>> serialize(elem) # attribute key
1491    Traceback (most recent call last):
1492    TypeError: cannot serialize 123 (type int)
1493    >>> elem = ET.Element("elem")
1494    >>> elem.set("123", 123)
1495    >>> serialize(elem) # attribute value
1496    Traceback (most recent call last):
1497    TypeError: cannot serialize 123 (type int)
1498
1499    """
1500
1501def bug_xmltoolkit25():
1502    """
1503
1504    typo in ElementTree.findtext
1505
1506    >>> elem = ET.XML(SAMPLE_XML)
1507    >>> tree = ET.ElementTree(elem)
1508    >>> tree.findtext("tag")
1509    'text'
1510    >>> tree.findtext("section/tag")
1511    'subtext'
1512
1513    """
1514
1515def bug_xmltoolkit28():
1516    """
1517
1518    .//tag causes exceptions
1519
1520    >>> tree = ET.XML("<doc><table><tbody/></table></doc>")
1521    >>> summarize_list(tree.findall(".//thead"))
1522    []
1523    >>> summarize_list(tree.findall(".//tbody"))
1524    ['tbody']
1525
1526    """
1527
1528def bug_xmltoolkitX1():
1529    """
1530
1531    dump() doesn't flush the output buffer
1532
1533    >>> tree = ET.XML("<doc><table><tbody/></table></doc>")
1534    >>> ET.dump(tree); sys.stdout.write("tail")
1535    <doc><table><tbody /></table></doc>
1536    tail
1537
1538    """
1539
1540def bug_xmltoolkit39():
1541    """
1542
1543    non-ascii element and attribute names doesn't work
1544
1545    >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />")
1546    >>> ET.tostring(tree, "utf-8")
1547    '<t\\xc3\\xa4g />'
1548
1549    >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><tag \xe4ttr='v&#228;lue' />")
1550    >>> tree.attrib
1551    {u'\\xe4ttr': u'v\\xe4lue'}
1552    >>> ET.tostring(tree, "utf-8")
1553    '<tag \\xc3\\xa4ttr="v\\xc3\\xa4lue" />'
1554
1555    >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g>text</t\xe4g>")
1556    >>> ET.tostring(tree, "utf-8")
1557    '<t\\xc3\\xa4g>text</t\\xc3\\xa4g>'
1558
1559    >>> tree = ET.Element(u"t\u00e4g")
1560    >>> ET.tostring(tree, "utf-8")
1561    '<t\\xc3\\xa4g />'
1562
1563    >>> tree = ET.Element("tag")
1564    >>> tree.set(u"\u00e4ttr", u"v\u00e4lue")
1565    >>> ET.tostring(tree, "utf-8")
1566    '<tag \\xc3\\xa4ttr="v\\xc3\\xa4lue" />'
1567
1568    """
1569
1570def bug_xmltoolkit54():
1571    """
1572
1573    problems handling internally defined entities
1574
1575    >>> e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '&#x8230;'>]><doc>&ldots;</doc>")
1576    >>> serialize(e)
1577    '<doc>&#33328;</doc>'
1578
1579    """
1580
1581def bug_xmltoolkit55():
1582    """
1583
1584    make sure we're reporting the first error, not the last
1585
1586    >>> e = ET.XML("<!DOCTYPE doc SYSTEM 'doc.dtd'><doc>&ldots;&ndots;&rdots;</doc>")
1587    Traceback (most recent call last):
1588    ParseError: undefined entity &ldots;: line 1, column 36
1589
1590    """
1591
1592class ExceptionFile:
1593    def read(self, x):
1594        raise IOError
1595
1596def xmltoolkit60():
1597    """
1598
1599    Handle crash in stream source.
1600    >>> tree = ET.parse(ExceptionFile())
1601    Traceback (most recent call last):
1602    IOError
1603
1604    """
1605
1606XMLTOOLKIT62_DOC = """<?xml version="1.0" encoding="UTF-8"?>
1607<!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []>
1608<patent-application-publication>
1609<subdoc-abstract>
1610<paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named &lsquo;BCT9801BEG&rsquo;.</paragraph>
1611</subdoc-abstract>
1612</patent-application-publication>"""
1613
1614
1615def xmltoolkit62():
1616    """
1617
1618    Don't crash when using custom entities.
1619
1620    >>> xmltoolkit62()
1621    u'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.'
1622
1623    """
1624    ENTITIES = {u'rsquo': u'\u2019', u'lsquo': u'\u2018'}
1625    parser = ET.XMLTreeBuilder()
1626    parser.entity.update(ENTITIES)
1627    parser.feed(XMLTOOLKIT62_DOC)
1628    t = parser.close()
1629    return t.find('.//paragraph').text
1630
1631def xmltoolkit63():
1632    """
1633
1634    Check reference leak.
1635    >>> xmltoolkit63()
1636    >>> count = sys.getrefcount(None)
1637    >>> for i in range(1000):
1638    ...     xmltoolkit63()
1639    >>> sys.getrefcount(None) - count
1640    0
1641
1642    """
1643    tree = ET.TreeBuilder()
1644    tree.start("tag", {})
1645    tree.data("text")
1646    tree.end("tag")
1647
1648# --------------------------------------------------------------------
1649
1650
1651def bug_200708_newline():
1652    r"""
1653
1654    Preserve newlines in attributes.
1655
1656    >>> e = ET.Element('SomeTag', text="def _f():\n  return 3\n")
1657    >>> ET.tostring(e)
1658    '<SomeTag text="def _f():&#10;  return 3&#10;" />'
1659    >>> ET.XML(ET.tostring(e)).get("text")
1660    'def _f():\n  return 3\n'
1661    >>> ET.tostring(ET.XML(ET.tostring(e)))
1662    '<SomeTag text="def _f():&#10;  return 3&#10;" />'
1663
1664    """
1665
1666def bug_200708_close():
1667    """
1668
1669    Test default builder.
1670    >>> parser = ET.XMLParser() # default
1671    >>> parser.feed("<element>some text</element>")
1672    >>> summarize(parser.close())
1673    'element'
1674
1675    Test custom builder.
1676    >>> class EchoTarget:
1677    ...     def close(self):
1678    ...         return ET.Element("element") # simulate root
1679    >>> parser = ET.XMLParser(EchoTarget())
1680    >>> parser.feed("<element>some text</element>")
1681    >>> summarize(parser.close())
1682    'element'
1683
1684    """
1685
1686def bug_200709_default_namespace():
1687    """
1688
1689    >>> e = ET.Element("{default}elem")
1690    >>> s = ET.SubElement(e, "{default}elem")
1691    >>> serialize(e, default_namespace="default") # 1
1692    '<elem xmlns="default"><elem /></elem>'
1693
1694    >>> e = ET.Element("{default}elem")
1695    >>> s = ET.SubElement(e, "{default}elem")
1696    >>> s = ET.SubElement(e, "{not-default}elem")
1697    >>> serialize(e, default_namespace="default") # 2
1698    '<elem xmlns="default" xmlns:ns1="not-default"><elem /><ns1:elem /></elem>'
1699
1700    >>> e = ET.Element("{default}elem")
1701    >>> s = ET.SubElement(e, "{default}elem")
1702    >>> s = ET.SubElement(e, "elem") # unprefixed name
1703    >>> serialize(e, default_namespace="default") # 3
1704    Traceback (most recent call last):
1705    ValueError: cannot use non-qualified names with default_namespace option
1706
1707    """
1708
1709def bug_200709_register_namespace():
1710    """
1711
1712    >>> ET.tostring(ET.Element("{http://namespace.invalid/does/not/exist/}title"))
1713    '<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />'
1714    >>> ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/")
1715    >>> ET.tostring(ET.Element("{http://namespace.invalid/does/not/exist/}title"))
1716    '<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />'
1717
1718    And the Dublin Core namespace is in the default list:
1719
1720    >>> ET.tostring(ET.Element("{http://purl.org/dc/elements/1.1/}title"))
1721    '<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />'
1722
1723    """
1724
1725def bug_200709_element_comment():
1726    """
1727
1728    Not sure if this can be fixed, really (since the serializer needs
1729    ET.Comment, not cET.comment).
1730
1731    >>> a = ET.Element('a')
1732    >>> a.append(ET.Comment('foo'))
1733    >>> a[0].tag == ET.Comment
1734    True
1735
1736    >>> a = ET.Element('a')
1737    >>> a.append(ET.PI('foo'))
1738    >>> a[0].tag == ET.PI
1739    True
1740
1741    """
1742
1743def bug_200709_element_insert():
1744    """
1745
1746    >>> a = ET.Element('a')
1747    >>> b = ET.SubElement(a, 'b')
1748    >>> c = ET.SubElement(a, 'c')
1749    >>> d = ET.Element('d')
1750    >>> a.insert(0, d)
1751    >>> summarize_list(a)
1752    ['d', 'b', 'c']
1753    >>> a.insert(-1, d)
1754    >>> summarize_list(a)
1755    ['d', 'b', 'd', 'c']
1756
1757    """
1758
1759def bug_200709_iter_comment():
1760    """
1761
1762    >>> a = ET.Element('a')
1763    >>> b = ET.SubElement(a, 'b')
1764    >>> comment_b = ET.Comment("TEST-b")
1765    >>> b.append(comment_b)
1766    >>> summarize_list(a.iter(ET.Comment))
1767    ['<Comment>']
1768
1769    """
1770
1771# --------------------------------------------------------------------
1772# reported on bugs.python.org
1773
1774def bug_1534630():
1775    """
1776
1777    >>> bob = ET.TreeBuilder()
1778    >>> e = bob.data("data")
1779    >>> e = bob.start("tag", {})
1780    >>> e = bob.end("tag")
1781    >>> e = bob.close()
1782    >>> serialize(e)
1783    '<tag />'
1784
1785    """
1786
1787def check_issue6233():
1788    """
1789
1790    >>> e = ET.XML("<?xml version='1.0' encoding='utf-8'?><body>t\\xc3\\xa3g</body>")
1791    >>> ET.tostring(e, 'ascii')
1792    "<?xml version='1.0' encoding='ascii'?>\\n<body>t&#227;g</body>"
1793    >>> e = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><body>t\\xe3g</body>")
1794    >>> ET.tostring(e, 'ascii')
1795    "<?xml version='1.0' encoding='ascii'?>\\n<body>t&#227;g</body>"
1796
1797    """
1798
1799def check_issue3151():
1800    """
1801
1802    >>> e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>')
1803    >>> e.tag
1804    '{${stuff}}localname'
1805    >>> t = ET.ElementTree(e)
1806    >>> ET.tostring(e)
1807    '<ns0:localname xmlns:ns0="${stuff}" />'
1808
1809    """
1810
1811def check_issue6565():
1812    """
1813
1814    >>> elem = ET.XML("<body><tag/></body>")
1815    >>> summarize_list(elem)
1816    ['tag']
1817    >>> newelem = ET.XML(SAMPLE_XML)
1818    >>> elem[:] = newelem[:]
1819    >>> summarize_list(elem)
1820    ['tag', 'tag', 'section']
1821
1822    """
1823
1824# --------------------------------------------------------------------
1825
1826
1827class CleanContext(object):
1828    """Provide default namespace mapping and path cache."""
1829    checkwarnings = None
1830
1831    def __init__(self, quiet=False):
1832        if sys.flags.optimize >= 2:
1833            # under -OO, doctests cannot be run and therefore not all warnings
1834            # will be emitted
1835            quiet = True
1836        deprecations = (
1837            # Search behaviour is broken if search path starts with "/".
1838            ("This search is broken in 1.3 and earlier, and will be fixed "
1839             "in a future version.  If you rely on the current behaviour, "
1840             "change it to '.+'", FutureWarning),
1841            # Element.getchildren() and Element.getiterator() are deprecated.
1842            ("This method will be removed in future versions.  "
1843             "Use .+ instead.", DeprecationWarning),
1844            ("This method will be removed in future versions.  "
1845             "Use .+ instead.", PendingDeprecationWarning),
1846            # XMLParser.doctype() is deprecated.
1847            ("This method of XMLParser is deprecated.  Define doctype.. "
1848             "method on the TreeBuilder target.", DeprecationWarning))
1849        self.checkwarnings = test_support.check_warnings(*deprecations,
1850                                                         quiet=quiet)
1851
1852    def __enter__(self):
1853        from xml.etree import ElementTree
1854        self._nsmap = ElementTree._namespace_map
1855        self._path_cache = ElementTree.ElementPath._cache
1856        # Copy the default namespace mapping
1857        ElementTree._namespace_map = self._nsmap.copy()
1858        # Copy the path cache (should be empty)
1859        ElementTree.ElementPath._cache = self._path_cache.copy()
1860        self.checkwarnings.__enter__()
1861
1862    def __exit__(self, *args):
1863        from xml.etree import ElementTree
1864        # Restore mapping and path cache
1865        ElementTree._namespace_map = self._nsmap
1866        ElementTree.ElementPath._cache = self._path_cache
1867        self.checkwarnings.__exit__(*args)
1868
1869
1870def test_main(module_name='xml.etree.ElementTree'):
1871    from test import test_xml_etree
1872
1873    use_py_module = (module_name == 'xml.etree.ElementTree')
1874
1875    # The same doctests are used for both the Python and the C implementations
1876    assert test_xml_etree.ET.__name__ == module_name
1877
1878    # XXX the C module should give the same warnings as the Python module
1879    with CleanContext(quiet=not use_py_module):
1880        test_support.run_doctest(test_xml_etree, verbosity=True)
1881
1882    # The module should not be changed by the tests
1883    assert test_xml_etree.ET.__name__ == module_name
1884
1885if __name__ == '__main__':
1886    test_main()
1887