1# xml.etree test. This file contains enough tests to make sure that 2# all included components work as they should. 3# Large parts are extracted from the upstream test suite. 4 5# IMPORTANT: the same doctests are run from "test_xml_etree_c" in 6# order to ensure consistency between the C implementation and the 7# Python implementation. 8# 9# For this purpose, the module-level "ET" symbol is temporarily 10# monkey-patched when running the "test_xml_etree_c" test suite. 11# Don't re-import "xml.etree.ElementTree" module in the docstring, 12# except if the test is specific to the Python implementation. 13 14import sys 15import cgi 16 17from test import test_support 18from test.test_support import findfile 19 20from xml.etree import ElementTree as ET 21 22SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata") 23SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata") 24 25SAMPLE_XML = """\ 26<body> 27 <tag class='a'>text</tag> 28 <tag class='b' /> 29 <section> 30 <tag class='b' id='inner'>subtext</tag> 31 </section> 32</body> 33""" 34 35SAMPLE_SECTION = """\ 36<section> 37 <tag class='b' id='inner'>subtext</tag> 38 <nexttag /> 39 <nextsection> 40 <tag /> 41 </nextsection> 42</section> 43""" 44 45SAMPLE_XML_NS = """ 46<body xmlns="http://effbot.org/ns"> 47 <tag>text</tag> 48 <tag /> 49 <section> 50 <tag>subtext</tag> 51 </section> 52</body> 53""" 54 55 56def sanity(): 57 """ 58 Import sanity. 59 60 >>> from xml.etree import ElementTree 61 >>> from xml.etree import ElementInclude 62 >>> from xml.etree import ElementPath 63 """ 64 65def check_method(method): 66 if not hasattr(method, '__call__'): 67 print method, "not callable" 68 69def serialize(elem, to_string=True, **options): 70 import StringIO 71 file = StringIO.StringIO() 72 tree = ET.ElementTree(elem) 73 tree.write(file, **options) 74 if to_string: 75 return file.getvalue() 76 else: 77 file.seek(0) 78 return file 79 80def summarize(elem): 81 if elem.tag == ET.Comment: 82 return "<Comment>" 83 return elem.tag 84 85def summarize_list(seq): 86 return [summarize(elem) for elem in seq] 87 88def normalize_crlf(tree): 89 for elem in tree.iter(): 90 if elem.text: 91 elem.text = elem.text.replace("\r\n", "\n") 92 if elem.tail: 93 elem.tail = elem.tail.replace("\r\n", "\n") 94 95def check_string(string): 96 len(string) 97 for char in string: 98 if len(char) != 1: 99 print "expected one-character string, got %r" % char 100 new_string = string + "" 101 new_string = string + " " 102 string[:0] 103 104def check_mapping(mapping): 105 len(mapping) 106 keys = mapping.keys() 107 items = mapping.items() 108 for key in keys: 109 item = mapping[key] 110 mapping["key"] = "value" 111 if mapping["key"] != "value": 112 print "expected value string, got %r" % mapping["key"] 113 114def check_element(element): 115 if not ET.iselement(element): 116 print "not an element" 117 if not hasattr(element, "tag"): 118 print "no tag member" 119 if not hasattr(element, "attrib"): 120 print "no attrib member" 121 if not hasattr(element, "text"): 122 print "no text member" 123 if not hasattr(element, "tail"): 124 print "no tail member" 125 126 check_string(element.tag) 127 check_mapping(element.attrib) 128 if element.text is not None: 129 check_string(element.text) 130 if element.tail is not None: 131 check_string(element.tail) 132 for elem in element: 133 check_element(elem) 134 135# -------------------------------------------------------------------- 136# element tree tests 137 138def interface(): 139 r""" 140 Test element tree interface. 141 142 >>> element = ET.Element("tag") 143 >>> check_element(element) 144 >>> tree = ET.ElementTree(element) 145 >>> check_element(tree.getroot()) 146 147 >>> element = ET.Element("t\xe4g", key="value") 148 >>> tree = ET.ElementTree(element) 149 >>> repr(element) # doctest: +ELLIPSIS 150 "<Element 't\\xe4g' at 0x...>" 151 >>> element = ET.Element("tag", key="value") 152 153 Make sure all standard element methods exist. 154 155 >>> check_method(element.append) 156 >>> check_method(element.extend) 157 >>> check_method(element.insert) 158 >>> check_method(element.remove) 159 >>> check_method(element.getchildren) 160 >>> check_method(element.find) 161 >>> check_method(element.iterfind) 162 >>> check_method(element.findall) 163 >>> check_method(element.findtext) 164 >>> check_method(element.clear) 165 >>> check_method(element.get) 166 >>> check_method(element.set) 167 >>> check_method(element.keys) 168 >>> check_method(element.items) 169 >>> check_method(element.iter) 170 >>> check_method(element.itertext) 171 >>> check_method(element.getiterator) 172 173 These methods return an iterable. See bug 6472. 174 175 >>> check_method(element.iter("tag").next) 176 >>> check_method(element.iterfind("tag").next) 177 >>> check_method(element.iterfind("*").next) 178 >>> check_method(tree.iter("tag").next) 179 >>> check_method(tree.iterfind("tag").next) 180 >>> check_method(tree.iterfind("*").next) 181 182 These aliases are provided: 183 184 >>> assert ET.XML == ET.fromstring 185 >>> assert ET.PI == ET.ProcessingInstruction 186 >>> assert ET.XMLParser == ET.XMLTreeBuilder 187 """ 188 189def simpleops(): 190 """ 191 Basic method sanity checks. 192 193 >>> elem = ET.XML("<body><tag/></body>") 194 >>> serialize(elem) 195 '<body><tag /></body>' 196 >>> e = ET.Element("tag2") 197 >>> elem.append(e) 198 >>> serialize(elem) 199 '<body><tag /><tag2 /></body>' 200 >>> elem.remove(e) 201 >>> serialize(elem) 202 '<body><tag /></body>' 203 >>> elem.insert(0, e) 204 >>> serialize(elem) 205 '<body><tag2 /><tag /></body>' 206 >>> elem.remove(e) 207 >>> elem.extend([e]) 208 >>> serialize(elem) 209 '<body><tag /><tag2 /></body>' 210 >>> elem.remove(e) 211 212 >>> element = ET.Element("tag", key="value") 213 >>> serialize(element) # 1 214 '<tag key="value" />' 215 >>> subelement = ET.Element("subtag") 216 >>> element.append(subelement) 217 >>> serialize(element) # 2 218 '<tag key="value"><subtag /></tag>' 219 >>> element.insert(0, subelement) 220 >>> serialize(element) # 3 221 '<tag key="value"><subtag /><subtag /></tag>' 222 >>> element.remove(subelement) 223 >>> serialize(element) # 4 224 '<tag key="value"><subtag /></tag>' 225 >>> element.remove(subelement) 226 >>> serialize(element) # 5 227 '<tag key="value" />' 228 >>> element.remove(subelement) 229 Traceback (most recent call last): 230 ValueError: list.remove(x): x not in list 231 >>> serialize(element) # 6 232 '<tag key="value" />' 233 >>> element[0:0] = [subelement, subelement, subelement] 234 >>> serialize(element[1]) 235 '<subtag />' 236 >>> element[1:9] == [element[1], element[2]] 237 True 238 >>> element[:9:2] == [element[0], element[2]] 239 True 240 >>> del element[1:2] 241 >>> serialize(element) 242 '<tag key="value"><subtag /><subtag /></tag>' 243 """ 244 245def cdata(): 246 """ 247 Test CDATA handling (etc). 248 249 >>> serialize(ET.XML("<tag>hello</tag>")) 250 '<tag>hello</tag>' 251 >>> serialize(ET.XML("<tag>hello</tag>")) 252 '<tag>hello</tag>' 253 >>> serialize(ET.XML("<tag><![CDATA[hello]]></tag>")) 254 '<tag>hello</tag>' 255 """ 256 257# Only with Python implementation 258def simplefind(): 259 """ 260 Test find methods using the elementpath fallback. 261 262 >>> from xml.etree import ElementTree 263 264 >>> CurrentElementPath = ElementTree.ElementPath 265 >>> ElementTree.ElementPath = ElementTree._SimpleElementPath() 266 >>> elem = ElementTree.XML(SAMPLE_XML) 267 >>> elem.find("tag").tag 268 'tag' 269 >>> ElementTree.ElementTree(elem).find("tag").tag 270 'tag' 271 >>> elem.findtext("tag") 272 'text' 273 >>> elem.findtext("tog") 274 >>> elem.findtext("tog", "default") 275 'default' 276 >>> ElementTree.ElementTree(elem).findtext("tag") 277 'text' 278 >>> summarize_list(elem.findall("tag")) 279 ['tag', 'tag'] 280 >>> summarize_list(elem.findall(".//tag")) 281 ['tag', 'tag', 'tag'] 282 283 Path syntax doesn't work in this case. 284 285 >>> elem.find("section/tag") 286 >>> elem.findtext("section/tag") 287 >>> summarize_list(elem.findall("section/tag")) 288 [] 289 290 >>> ElementTree.ElementPath = CurrentElementPath 291 """ 292 293def find(): 294 """ 295 Test find methods (including xpath syntax). 296 297 >>> elem = ET.XML(SAMPLE_XML) 298 >>> elem.find("tag").tag 299 'tag' 300 >>> ET.ElementTree(elem).find("tag").tag 301 'tag' 302 >>> elem.find("section/tag").tag 303 'tag' 304 >>> elem.find("./tag").tag 305 'tag' 306 >>> ET.ElementTree(elem).find("./tag").tag 307 'tag' 308 >>> ET.ElementTree(elem).find("/tag").tag 309 'tag' 310 >>> elem[2] = ET.XML(SAMPLE_SECTION) 311 >>> elem.find("section/nexttag").tag 312 'nexttag' 313 >>> ET.ElementTree(elem).find("section/tag").tag 314 'tag' 315 >>> ET.ElementTree(elem).find("tog") 316 >>> ET.ElementTree(elem).find("tog/foo") 317 >>> elem.findtext("tag") 318 'text' 319 >>> elem.findtext("section/nexttag") 320 '' 321 >>> elem.findtext("section/nexttag", "default") 322 '' 323 >>> elem.findtext("tog") 324 >>> elem.findtext("tog", "default") 325 'default' 326 >>> ET.ElementTree(elem).findtext("tag") 327 'text' 328 >>> ET.ElementTree(elem).findtext("tog/foo") 329 >>> ET.ElementTree(elem).findtext("tog/foo", "default") 330 'default' 331 >>> ET.ElementTree(elem).findtext("./tag") 332 'text' 333 >>> ET.ElementTree(elem).findtext("/tag") 334 'text' 335 >>> elem.findtext("section/tag") 336 'subtext' 337 >>> ET.ElementTree(elem).findtext("section/tag") 338 'subtext' 339 >>> summarize_list(elem.findall(".")) 340 ['body'] 341 >>> summarize_list(elem.findall("tag")) 342 ['tag', 'tag'] 343 >>> summarize_list(elem.findall("tog")) 344 [] 345 >>> summarize_list(elem.findall("tog/foo")) 346 [] 347 >>> summarize_list(elem.findall("*")) 348 ['tag', 'tag', 'section'] 349 >>> summarize_list(elem.findall(".//tag")) 350 ['tag', 'tag', 'tag', 'tag'] 351 >>> summarize_list(elem.findall("section/tag")) 352 ['tag'] 353 >>> summarize_list(elem.findall("section//tag")) 354 ['tag', 'tag'] 355 >>> summarize_list(elem.findall("section/*")) 356 ['tag', 'nexttag', 'nextsection'] 357 >>> summarize_list(elem.findall("section//*")) 358 ['tag', 'nexttag', 'nextsection', 'tag'] 359 >>> summarize_list(elem.findall("section/.//*")) 360 ['tag', 'nexttag', 'nextsection', 'tag'] 361 >>> summarize_list(elem.findall("*/*")) 362 ['tag', 'nexttag', 'nextsection'] 363 >>> summarize_list(elem.findall("*//*")) 364 ['tag', 'nexttag', 'nextsection', 'tag'] 365 >>> summarize_list(elem.findall("*/tag")) 366 ['tag'] 367 >>> summarize_list(elem.findall("*/./tag")) 368 ['tag'] 369 >>> summarize_list(elem.findall("./tag")) 370 ['tag', 'tag'] 371 >>> summarize_list(elem.findall(".//tag")) 372 ['tag', 'tag', 'tag', 'tag'] 373 >>> summarize_list(elem.findall("././tag")) 374 ['tag', 'tag'] 375 >>> summarize_list(elem.findall(".//tag[@class]")) 376 ['tag', 'tag', 'tag'] 377 >>> summarize_list(elem.findall(".//tag[@class='a']")) 378 ['tag'] 379 >>> summarize_list(elem.findall(".//tag[@class='b']")) 380 ['tag', 'tag'] 381 >>> summarize_list(elem.findall(".//tag[@id]")) 382 ['tag'] 383 >>> summarize_list(elem.findall(".//section[tag]")) 384 ['section'] 385 >>> summarize_list(elem.findall(".//section[element]")) 386 [] 387 >>> summarize_list(elem.findall("../tag")) 388 [] 389 >>> summarize_list(elem.findall("section/../tag")) 390 ['tag', 'tag'] 391 >>> summarize_list(ET.ElementTree(elem).findall("./tag")) 392 ['tag', 'tag'] 393 394 Following example is invalid in 1.2. 395 A leading '*' is assumed in 1.3. 396 397 >>> elem.findall("section//") == elem.findall("section//*") 398 True 399 400 ET's Path module handles this case incorrectly; this gives 401 a warning in 1.3, and the behaviour will be modified in 1.4. 402 403 >>> summarize_list(ET.ElementTree(elem).findall("/tag")) 404 ['tag', 'tag'] 405 406 >>> elem = ET.XML(SAMPLE_XML_NS) 407 >>> summarize_list(elem.findall("tag")) 408 [] 409 >>> summarize_list(elem.findall("{http://effbot.org/ns}tag")) 410 ['{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag'] 411 >>> summarize_list(elem.findall(".//{http://effbot.org/ns}tag")) 412 ['{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag'] 413 """ 414 415def file_init(): 416 """ 417 >>> import StringIO 418 419 >>> stringfile = StringIO.StringIO(SAMPLE_XML) 420 >>> tree = ET.ElementTree(file=stringfile) 421 >>> tree.find("tag").tag 422 'tag' 423 >>> tree.find("section/tag").tag 424 'tag' 425 426 >>> tree = ET.ElementTree(file=SIMPLE_XMLFILE) 427 >>> tree.find("element").tag 428 'element' 429 >>> tree.find("element/../empty-element").tag 430 'empty-element' 431 """ 432 433def bad_find(): 434 """ 435 Check bad or unsupported path expressions. 436 437 >>> elem = ET.XML(SAMPLE_XML) 438 >>> elem.findall("/tag") 439 Traceback (most recent call last): 440 SyntaxError: cannot use absolute path on element 441 """ 442 443def path_cache(): 444 """ 445 Check that the path cache behaves sanely. 446 447 >>> elem = ET.XML(SAMPLE_XML) 448 >>> for i in range(10): ET.ElementTree(elem).find('./'+str(i)) 449 >>> cache_len_10 = len(ET.ElementPath._cache) 450 >>> for i in range(10): ET.ElementTree(elem).find('./'+str(i)) 451 >>> len(ET.ElementPath._cache) == cache_len_10 452 True 453 >>> for i in range(20): ET.ElementTree(elem).find('./'+str(i)) 454 >>> len(ET.ElementPath._cache) > cache_len_10 455 True 456 >>> for i in range(600): ET.ElementTree(elem).find('./'+str(i)) 457 >>> len(ET.ElementPath._cache) < 500 458 True 459 """ 460 461def copy(): 462 """ 463 Test copy handling (etc). 464 465 >>> import copy 466 >>> e1 = ET.XML("<tag>hello<foo/></tag>") 467 >>> e2 = copy.copy(e1) 468 >>> e3 = copy.deepcopy(e1) 469 >>> e1.find("foo").tag = "bar" 470 >>> serialize(e1) 471 '<tag>hello<bar /></tag>' 472 >>> serialize(e2) 473 '<tag>hello<bar /></tag>' 474 >>> serialize(e3) 475 '<tag>hello<foo /></tag>' 476 477 """ 478 479def attrib(): 480 """ 481 Test attribute handling. 482 483 >>> elem = ET.Element("tag") 484 >>> elem.get("key") # 1.1 485 >>> elem.get("key", "default") # 1.2 486 'default' 487 >>> elem.set("key", "value") 488 >>> elem.get("key") # 1.3 489 'value' 490 491 >>> elem = ET.Element("tag", key="value") 492 >>> elem.get("key") # 2.1 493 'value' 494 >>> elem.attrib # 2.2 495 {'key': 'value'} 496 497 >>> attrib = {"key": "value"} 498 >>> elem = ET.Element("tag", attrib) 499 >>> attrib.clear() # check for aliasing issues 500 >>> elem.get("key") # 3.1 501 'value' 502 >>> elem.attrib # 3.2 503 {'key': 'value'} 504 505 >>> attrib = {"key": "value"} 506 >>> elem = ET.Element("tag", **attrib) 507 >>> attrib.clear() # check for aliasing issues 508 >>> elem.get("key") # 4.1 509 'value' 510 >>> elem.attrib # 4.2 511 {'key': 'value'} 512 513 >>> elem = ET.Element("tag", {"key": "other"}, key="value") 514 >>> elem.get("key") # 5.1 515 'value' 516 >>> elem.attrib # 5.2 517 {'key': 'value'} 518 519 >>> elem = ET.Element('test') 520 >>> elem.text = "aa" 521 >>> elem.set('testa', 'testval') 522 >>> elem.set('testb', 'test2') 523 >>> ET.tostring(elem) 524 '<test testa="testval" testb="test2">aa</test>' 525 >>> sorted(elem.keys()) 526 ['testa', 'testb'] 527 >>> sorted(elem.items()) 528 [('testa', 'testval'), ('testb', 'test2')] 529 >>> elem.attrib['testb'] 530 'test2' 531 >>> elem.attrib['testb'] = 'test1' 532 >>> elem.attrib['testc'] = 'test2' 533 >>> ET.tostring(elem) 534 '<test testa="testval" testb="test1" testc="test2">aa</test>' 535 """ 536 537def makeelement(): 538 """ 539 Test makeelement handling. 540 541 >>> elem = ET.Element("tag") 542 >>> attrib = {"key": "value"} 543 >>> subelem = elem.makeelement("subtag", attrib) 544 >>> if subelem.attrib is attrib: 545 ... print "attrib aliasing" 546 >>> elem.append(subelem) 547 >>> serialize(elem) 548 '<tag><subtag key="value" /></tag>' 549 550 >>> elem.clear() 551 >>> serialize(elem) 552 '<tag />' 553 >>> elem.append(subelem) 554 >>> serialize(elem) 555 '<tag><subtag key="value" /></tag>' 556 >>> elem.extend([subelem, subelem]) 557 >>> serialize(elem) 558 '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>' 559 >>> elem[:] = [subelem] 560 >>> serialize(elem) 561 '<tag><subtag key="value" /></tag>' 562 >>> elem[:] = tuple([subelem]) 563 >>> serialize(elem) 564 '<tag><subtag key="value" /></tag>' 565 566 """ 567 568def parsefile(): 569 """ 570 Test parsing from file. 571 572 >>> tree = ET.parse(SIMPLE_XMLFILE) 573 >>> normalize_crlf(tree) 574 >>> tree.write(sys.stdout) 575 <root> 576 <element key="value">text</element> 577 <element>text</element>tail 578 <empty-element /> 579 </root> 580 >>> tree = ET.parse(SIMPLE_NS_XMLFILE) 581 >>> normalize_crlf(tree) 582 >>> tree.write(sys.stdout) 583 <ns0:root xmlns:ns0="namespace"> 584 <ns0:element key="value">text</ns0:element> 585 <ns0:element>text</ns0:element>tail 586 <ns0:empty-element /> 587 </ns0:root> 588 589 >>> with open(SIMPLE_XMLFILE) as f: 590 ... data = f.read() 591 592 >>> parser = ET.XMLParser() 593 >>> parser.version # doctest: +ELLIPSIS 594 'Expat ...' 595 >>> parser.feed(data) 596 >>> print serialize(parser.close()) 597 <root> 598 <element key="value">text</element> 599 <element>text</element>tail 600 <empty-element /> 601 </root> 602 603 >>> parser = ET.XMLTreeBuilder() # 1.2 compatibility 604 >>> parser.feed(data) 605 >>> print serialize(parser.close()) 606 <root> 607 <element key="value">text</element> 608 <element>text</element>tail 609 <empty-element /> 610 </root> 611 612 >>> target = ET.TreeBuilder() 613 >>> parser = ET.XMLParser(target=target) 614 >>> parser.feed(data) 615 >>> print serialize(parser.close()) 616 <root> 617 <element key="value">text</element> 618 <element>text</element>tail 619 <empty-element /> 620 </root> 621 """ 622 623def parseliteral(): 624 """ 625 >>> element = ET.XML("<html><body>text</body></html>") 626 >>> ET.ElementTree(element).write(sys.stdout) 627 <html><body>text</body></html> 628 >>> element = ET.fromstring("<html><body>text</body></html>") 629 >>> ET.ElementTree(element).write(sys.stdout) 630 <html><body>text</body></html> 631 >>> sequence = ["<html><body>", "text</bo", "dy></html>"] 632 >>> element = ET.fromstringlist(sequence) 633 >>> print ET.tostring(element) 634 <html><body>text</body></html> 635 >>> print "".join(ET.tostringlist(element)) 636 <html><body>text</body></html> 637 >>> ET.tostring(element, "ascii") 638 "<?xml version='1.0' encoding='ascii'?>\\n<html><body>text</body></html>" 639 >>> _, ids = ET.XMLID("<html><body>text</body></html>") 640 >>> len(ids) 641 0 642 >>> _, ids = ET.XMLID("<html><body id='body'>text</body></html>") 643 >>> len(ids) 644 1 645 >>> ids["body"].tag 646 'body' 647 """ 648 649def iterparse(): 650 """ 651 Test iterparse interface. 652 653 >>> iterparse = ET.iterparse 654 655 >>> context = iterparse(SIMPLE_XMLFILE) 656 >>> action, elem = next(context) 657 >>> print action, elem.tag 658 end element 659 >>> for action, elem in context: 660 ... print action, elem.tag 661 end element 662 end empty-element 663 end root 664 >>> context.root.tag 665 'root' 666 667 >>> context = iterparse(SIMPLE_NS_XMLFILE) 668 >>> for action, elem in context: 669 ... print action, elem.tag 670 end {namespace}element 671 end {namespace}element 672 end {namespace}empty-element 673 end {namespace}root 674 675 >>> events = () 676 >>> context = iterparse(SIMPLE_XMLFILE, events) 677 >>> for action, elem in context: 678 ... print action, elem.tag 679 680 >>> events = () 681 >>> context = iterparse(SIMPLE_XMLFILE, events=events) 682 >>> for action, elem in context: 683 ... print action, elem.tag 684 685 >>> events = ("start", "end") 686 >>> context = iterparse(SIMPLE_XMLFILE, events) 687 >>> for action, elem in context: 688 ... print action, elem.tag 689 start root 690 start element 691 end element 692 start element 693 end element 694 start empty-element 695 end empty-element 696 end root 697 698 >>> events = ("start", "end", "start-ns", "end-ns") 699 >>> context = iterparse(SIMPLE_NS_XMLFILE, events) 700 >>> for action, elem in context: 701 ... if action in ("start", "end"): 702 ... print action, elem.tag 703 ... else: 704 ... print action, elem 705 start-ns ('', 'namespace') 706 start {namespace}root 707 start {namespace}element 708 end {namespace}element 709 start {namespace}element 710 end {namespace}element 711 start {namespace}empty-element 712 end {namespace}empty-element 713 end {namespace}root 714 end-ns None 715 716 >>> events = ("start", "end", "bogus") 717 >>> with open(SIMPLE_XMLFILE, "rb") as f: 718 ... iterparse(f, events) 719 Traceback (most recent call last): 720 ValueError: unknown event 'bogus' 721 722 >>> import StringIO 723 724 >>> source = StringIO.StringIO( 725 ... "<?xml version='1.0' encoding='iso-8859-1'?>\\n" 726 ... "<body xmlns='http://éffbot.org/ns'\\n" 727 ... " xmlns:cl\\xe9='http://effbot.org/ns'>text</body>\\n") 728 >>> events = ("start-ns",) 729 >>> context = iterparse(source, events) 730 >>> for action, elem in context: 731 ... print action, elem 732 start-ns ('', u'http://\\xe9ffbot.org/ns') 733 start-ns (u'cl\\xe9', 'http://effbot.org/ns') 734 735 >>> source = StringIO.StringIO("<document />junk") 736 >>> try: 737 ... for action, elem in iterparse(source): 738 ... print action, elem.tag 739 ... except ET.ParseError, v: 740 ... print v 741 junk after document element: line 1, column 12 742 """ 743 744def writefile(): 745 """ 746 >>> elem = ET.Element("tag") 747 >>> elem.text = "text" 748 >>> serialize(elem) 749 '<tag>text</tag>' 750 >>> ET.SubElement(elem, "subtag").text = "subtext" 751 >>> serialize(elem) 752 '<tag>text<subtag>subtext</subtag></tag>' 753 754 Test tag suppression 755 >>> elem.tag = None 756 >>> serialize(elem) 757 'text<subtag>subtext</subtag>' 758 >>> elem.insert(0, ET.Comment("comment")) 759 >>> serialize(elem) # assumes 1.3 760 'text<!--comment--><subtag>subtext</subtag>' 761 >>> elem[0] = ET.PI("key", "value") 762 >>> serialize(elem) 763 'text<?key value?><subtag>subtext</subtag>' 764 """ 765 766def custom_builder(): 767 """ 768 Test parser w. custom builder. 769 770 >>> with open(SIMPLE_XMLFILE) as f: 771 ... data = f.read() 772 >>> class Builder: 773 ... def start(self, tag, attrib): 774 ... print "start", tag 775 ... def end(self, tag): 776 ... print "end", tag 777 ... def data(self, text): 778 ... pass 779 >>> builder = Builder() 780 >>> parser = ET.XMLParser(target=builder) 781 >>> parser.feed(data) 782 start root 783 start element 784 end element 785 start element 786 end element 787 start empty-element 788 end empty-element 789 end root 790 791 >>> with open(SIMPLE_NS_XMLFILE) as f: 792 ... data = f.read() 793 >>> class Builder: 794 ... def start(self, tag, attrib): 795 ... print "start", tag 796 ... def end(self, tag): 797 ... print "end", tag 798 ... def data(self, text): 799 ... pass 800 ... def pi(self, target, data): 801 ... print "pi", target, repr(data) 802 ... def comment(self, data): 803 ... print "comment", repr(data) 804 >>> builder = Builder() 805 >>> parser = ET.XMLParser(target=builder) 806 >>> parser.feed(data) 807 pi pi 'data' 808 comment ' comment ' 809 start {namespace}root 810 start {namespace}element 811 end {namespace}element 812 start {namespace}element 813 end {namespace}element 814 start {namespace}empty-element 815 end {namespace}empty-element 816 end {namespace}root 817 818 """ 819 820def getchildren(): 821 """ 822 Test Element.getchildren() 823 824 >>> with open(SIMPLE_XMLFILE, "r") as f: 825 ... tree = ET.parse(f) 826 >>> for elem in tree.getroot().iter(): 827 ... summarize_list(elem.getchildren()) 828 ['element', 'element', 'empty-element'] 829 [] 830 [] 831 [] 832 >>> for elem in tree.getiterator(): 833 ... summarize_list(elem.getchildren()) 834 ['element', 'element', 'empty-element'] 835 [] 836 [] 837 [] 838 839 >>> elem = ET.XML(SAMPLE_XML) 840 >>> len(elem.getchildren()) 841 3 842 >>> len(elem[2].getchildren()) 843 1 844 >>> elem[:] == elem.getchildren() 845 True 846 >>> child1 = elem[0] 847 >>> child2 = elem[2] 848 >>> del elem[1:2] 849 >>> len(elem.getchildren()) 850 2 851 >>> child1 == elem[0] 852 True 853 >>> child2 == elem[1] 854 True 855 >>> elem[0:2] = [child2, child1] 856 >>> child2 == elem[0] 857 True 858 >>> child1 == elem[1] 859 True 860 >>> child1 == elem[0] 861 False 862 >>> elem.clear() 863 >>> elem.getchildren() 864 [] 865 """ 866 867def writestring(): 868 """ 869 >>> elem = ET.XML("<html><body>text</body></html>") 870 >>> ET.tostring(elem) 871 '<html><body>text</body></html>' 872 >>> elem = ET.fromstring("<html><body>text</body></html>") 873 >>> ET.tostring(elem) 874 '<html><body>text</body></html>' 875 """ 876 877def check_encoding(encoding): 878 """ 879 >>> check_encoding("ascii") 880 >>> check_encoding("us-ascii") 881 >>> check_encoding("iso-8859-1") 882 >>> check_encoding("iso-8859-15") 883 >>> check_encoding("cp437") 884 >>> check_encoding("mac-roman") 885 """ 886 ET.XML("<?xml version='1.0' encoding='%s'?><xml />" % encoding) 887 888def encoding(): 889 r""" 890 Test encoding issues. 891 892 >>> elem = ET.Element("tag") 893 >>> elem.text = u"abc" 894 >>> serialize(elem) 895 '<tag>abc</tag>' 896 >>> serialize(elem, encoding="utf-8") 897 '<tag>abc</tag>' 898 >>> serialize(elem, encoding="us-ascii") 899 '<tag>abc</tag>' 900 >>> serialize(elem, encoding="iso-8859-1") 901 "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>abc</tag>" 902 903 >>> elem.text = "<&\"\'>" 904 >>> serialize(elem) 905 '<tag><&"\'></tag>' 906 >>> serialize(elem, encoding="utf-8") 907 '<tag><&"\'></tag>' 908 >>> serialize(elem, encoding="us-ascii") # cdata characters 909 '<tag><&"\'></tag>' 910 >>> serialize(elem, encoding="iso-8859-1") 911 '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag><&"\'></tag>' 912 913 >>> elem.attrib["key"] = "<&\"\'>" 914 >>> elem.text = None 915 >>> serialize(elem) 916 '<tag key="<&"\'>" />' 917 >>> serialize(elem, encoding="utf-8") 918 '<tag key="<&"\'>" />' 919 >>> serialize(elem, encoding="us-ascii") 920 '<tag key="<&"\'>" />' 921 >>> serialize(elem, encoding="iso-8859-1") 922 '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="<&"\'>" />' 923 924 >>> elem.text = u'\xe5\xf6\xf6<>' 925 >>> elem.attrib.clear() 926 >>> serialize(elem) 927 '<tag>åöö<></tag>' 928 >>> serialize(elem, encoding="utf-8") 929 '<tag>\xc3\xa5\xc3\xb6\xc3\xb6<></tag>' 930 >>> serialize(elem, encoding="us-ascii") 931 '<tag>åöö<></tag>' 932 >>> serialize(elem, encoding="iso-8859-1") 933 "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>\xe5\xf6\xf6<></tag>" 934 935 >>> elem.attrib["key"] = u'\xe5\xf6\xf6<>' 936 >>> elem.text = None 937 >>> serialize(elem) 938 '<tag key="åöö<>" />' 939 >>> serialize(elem, encoding="utf-8") 940 '<tag key="\xc3\xa5\xc3\xb6\xc3\xb6<>" />' 941 >>> serialize(elem, encoding="us-ascii") 942 '<tag key="åöö<>" />' 943 >>> serialize(elem, encoding="iso-8859-1") 944 '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="\xe5\xf6\xf6<>" />' 945 """ 946 947def methods(): 948 r""" 949 Test serialization methods. 950 951 >>> e = ET.XML("<html><link/><script>1 < 2</script></html>") 952 >>> e.tail = "\n" 953 >>> serialize(e) 954 '<html><link /><script>1 < 2</script></html>\n' 955 >>> serialize(e, method=None) 956 '<html><link /><script>1 < 2</script></html>\n' 957 >>> serialize(e, method="xml") 958 '<html><link /><script>1 < 2</script></html>\n' 959 >>> serialize(e, method="html") 960 '<html><link><script>1 < 2</script></html>\n' 961 >>> serialize(e, method="text") 962 '1 < 2\n' 963 """ 964 965def iterators(): 966 """ 967 Test iterators. 968 969 >>> e = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>") 970 >>> summarize_list(e.iter()) 971 ['html', 'body', 'i'] 972 >>> summarize_list(e.find("body").iter()) 973 ['body', 'i'] 974 >>> summarize(next(e.iter())) 975 'html' 976 >>> "".join(e.itertext()) 977 'this is a paragraph...' 978 >>> "".join(e.find("body").itertext()) 979 'this is a paragraph.' 980 >>> next(e.itertext()) 981 'this is a ' 982 983 Method iterparse should return an iterator. See bug 6472. 984 985 >>> sourcefile = serialize(e, to_string=False) 986 >>> next(ET.iterparse(sourcefile)) # doctest: +ELLIPSIS 987 ('end', <Element 'i' at 0x...>) 988 989 >>> tree = ET.ElementTree(None) 990 >>> tree.iter() 991 Traceback (most recent call last): 992 AttributeError: 'NoneType' object has no attribute 'iter' 993 """ 994 995ENTITY_XML = """\ 996<!DOCTYPE points [ 997<!ENTITY % user-entities SYSTEM 'user-entities.xml'> 998%user-entities; 999]> 1000<document>&entity;</document> 1001""" 1002 1003def entity(): 1004 """ 1005 Test entity handling. 1006 1007 1) good entities 1008 1009 >>> e = ET.XML("<document title='舰'>test</document>") 1010 >>> serialize(e) 1011 '<document title="舰">test</document>' 1012 1013 2) bad entities 1014 1015 >>> ET.XML("<document>&entity;</document>") 1016 Traceback (most recent call last): 1017 ParseError: undefined entity: line 1, column 10 1018 1019 >>> ET.XML(ENTITY_XML) 1020 Traceback (most recent call last): 1021 ParseError: undefined entity &entity;: line 5, column 10 1022 1023 3) custom entity 1024 1025 >>> parser = ET.XMLParser() 1026 >>> parser.entity["entity"] = "text" 1027 >>> parser.feed(ENTITY_XML) 1028 >>> root = parser.close() 1029 >>> serialize(root) 1030 '<document>text</document>' 1031 """ 1032 1033def error(xml): 1034 """ 1035 1036 Test error handling. 1037 1038 >>> issubclass(ET.ParseError, SyntaxError) 1039 True 1040 >>> error("foo").position 1041 (1, 0) 1042 >>> error("<tag>&foo;</tag>").position 1043 (1, 5) 1044 >>> error("foobar<").position 1045 (1, 6) 1046 1047 """ 1048 try: 1049 ET.XML(xml) 1050 except ET.ParseError: 1051 return sys.exc_value 1052 1053def namespace(): 1054 """ 1055 Test namespace issues. 1056 1057 1) xml namespace 1058 1059 >>> elem = ET.XML("<tag xml:lang='en' />") 1060 >>> serialize(elem) # 1.1 1061 '<tag xml:lang="en" />' 1062 1063 2) other "well-known" namespaces 1064 1065 >>> elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />") 1066 >>> serialize(elem) # 2.1 1067 '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />' 1068 1069 >>> elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />") 1070 >>> serialize(elem) # 2.2 1071 '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />' 1072 1073 >>> elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />") 1074 >>> serialize(elem) # 2.3 1075 '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />' 1076 1077 3) unknown namespaces 1078 >>> elem = ET.XML(SAMPLE_XML_NS) 1079 >>> print serialize(elem) 1080 <ns0:body xmlns:ns0="http://effbot.org/ns"> 1081 <ns0:tag>text</ns0:tag> 1082 <ns0:tag /> 1083 <ns0:section> 1084 <ns0:tag>subtext</ns0:tag> 1085 </ns0:section> 1086 </ns0:body> 1087 """ 1088 1089def qname(): 1090 """ 1091 Test QName handling. 1092 1093 1) decorated tags 1094 1095 >>> elem = ET.Element("{uri}tag") 1096 >>> serialize(elem) # 1.1 1097 '<ns0:tag xmlns:ns0="uri" />' 1098 >>> elem = ET.Element(ET.QName("{uri}tag")) 1099 >>> serialize(elem) # 1.2 1100 '<ns0:tag xmlns:ns0="uri" />' 1101 >>> elem = ET.Element(ET.QName("uri", "tag")) 1102 >>> serialize(elem) # 1.3 1103 '<ns0:tag xmlns:ns0="uri" />' 1104 >>> elem = ET.Element(ET.QName("uri", "tag")) 1105 >>> subelem = ET.SubElement(elem, ET.QName("uri", "tag1")) 1106 >>> subelem = ET.SubElement(elem, ET.QName("uri", "tag2")) 1107 >>> serialize(elem) # 1.4 1108 '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>' 1109 1110 2) decorated attributes 1111 1112 >>> elem.clear() 1113 >>> elem.attrib["{uri}key"] = "value" 1114 >>> serialize(elem) # 2.1 1115 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />' 1116 1117 >>> elem.clear() 1118 >>> elem.attrib[ET.QName("{uri}key")] = "value" 1119 >>> serialize(elem) # 2.2 1120 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />' 1121 1122 3) decorated values are not converted by default, but the 1123 QName wrapper can be used for values 1124 1125 >>> elem.clear() 1126 >>> elem.attrib["{uri}key"] = "{uri}value" 1127 >>> serialize(elem) # 3.1 1128 '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />' 1129 1130 >>> elem.clear() 1131 >>> elem.attrib["{uri}key"] = ET.QName("{uri}value") 1132 >>> serialize(elem) # 3.2 1133 '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />' 1134 1135 >>> elem.clear() 1136 >>> subelem = ET.Element("tag") 1137 >>> subelem.attrib["{uri1}key"] = ET.QName("{uri2}value") 1138 >>> elem.append(subelem) 1139 >>> elem.append(subelem) 1140 >>> serialize(elem) # 3.3 1141 '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2"><tag ns1:key="ns2:value" /><tag ns1:key="ns2:value" /></ns0:tag>' 1142 1143 4) Direct QName tests 1144 1145 >>> str(ET.QName('ns', 'tag')) 1146 '{ns}tag' 1147 >>> str(ET.QName('{ns}tag')) 1148 '{ns}tag' 1149 >>> q1 = ET.QName('ns', 'tag') 1150 >>> q2 = ET.QName('ns', 'tag') 1151 >>> q1 == q2 1152 True 1153 >>> q2 = ET.QName('ns', 'other-tag') 1154 >>> q1 == q2 1155 False 1156 >>> q1 == 'ns:tag' 1157 False 1158 >>> q1 == '{ns}tag' 1159 True 1160 """ 1161 1162def doctype_public(): 1163 """ 1164 Test PUBLIC doctype. 1165 1166 >>> elem = ET.XML('<!DOCTYPE html PUBLIC' 1167 ... ' "-//W3C//DTD XHTML 1.0 Transitional//EN"' 1168 ... ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' 1169 ... '<html>text</html>') 1170 1171 """ 1172 1173def xpath_tokenizer(p): 1174 """ 1175 Test the XPath tokenizer. 1176 1177 >>> # tests from the xml specification 1178 >>> xpath_tokenizer("*") 1179 ['*'] 1180 >>> xpath_tokenizer("text()") 1181 ['text', '()'] 1182 >>> xpath_tokenizer("@name") 1183 ['@', 'name'] 1184 >>> xpath_tokenizer("@*") 1185 ['@', '*'] 1186 >>> xpath_tokenizer("para[1]") 1187 ['para', '[', '1', ']'] 1188 >>> xpath_tokenizer("para[last()]") 1189 ['para', '[', 'last', '()', ']'] 1190 >>> xpath_tokenizer("*/para") 1191 ['*', '/', 'para'] 1192 >>> xpath_tokenizer("/doc/chapter[5]/section[2]") 1193 ['/', 'doc', '/', 'chapter', '[', '5', ']', '/', 'section', '[', '2', ']'] 1194 >>> xpath_tokenizer("chapter//para") 1195 ['chapter', '//', 'para'] 1196 >>> xpath_tokenizer("//para") 1197 ['//', 'para'] 1198 >>> xpath_tokenizer("//olist/item") 1199 ['//', 'olist', '/', 'item'] 1200 >>> xpath_tokenizer(".") 1201 ['.'] 1202 >>> xpath_tokenizer(".//para") 1203 ['.', '//', 'para'] 1204 >>> xpath_tokenizer("..") 1205 ['..'] 1206 >>> xpath_tokenizer("../@lang") 1207 ['..', '/', '@', 'lang'] 1208 >>> xpath_tokenizer("chapter[title]") 1209 ['chapter', '[', 'title', ']'] 1210 >>> xpath_tokenizer("employee[@secretary and @assistant]") 1211 ['employee', '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']'] 1212 1213 >>> # additional tests 1214 >>> xpath_tokenizer("{http://spam}egg") 1215 ['{http://spam}egg'] 1216 >>> xpath_tokenizer("./spam.egg") 1217 ['.', '/', 'spam.egg'] 1218 >>> xpath_tokenizer(".//{http://spam}egg") 1219 ['.', '//', '{http://spam}egg'] 1220 """ 1221 from xml.etree import ElementPath 1222 out = [] 1223 for op, tag in ElementPath.xpath_tokenizer(p): 1224 out.append(op or tag) 1225 return out 1226 1227def processinginstruction(): 1228 """ 1229 Test ProcessingInstruction directly 1230 1231 >>> ET.tostring(ET.ProcessingInstruction('test', 'instruction')) 1232 '<?test instruction?>' 1233 >>> ET.tostring(ET.PI('test', 'instruction')) 1234 '<?test instruction?>' 1235 1236 Issue #2746 1237 1238 >>> ET.tostring(ET.PI('test', '<testing&>')) 1239 '<?test <testing&>?>' 1240 >>> ET.tostring(ET.PI('test', u'<testing&>\xe3'), 'latin1') 1241 "<?xml version='1.0' encoding='latin1'?>\\n<?test <testing&>\\xe3?>" 1242 """ 1243 1244# 1245# xinclude tests (samples from appendix C of the xinclude specification) 1246 1247XINCLUDE = {} 1248 1249XINCLUDE["C1.xml"] = """\ 1250<?xml version='1.0'?> 1251<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1252 <p>120 Mz is adequate for an average home user.</p> 1253 <xi:include href="disclaimer.xml"/> 1254</document> 1255""" 1256 1257XINCLUDE["disclaimer.xml"] = """\ 1258<?xml version='1.0'?> 1259<disclaimer> 1260 <p>The opinions represented herein represent those of the individual 1261 and should not be interpreted as official policy endorsed by this 1262 organization.</p> 1263</disclaimer> 1264""" 1265 1266XINCLUDE["C2.xml"] = """\ 1267<?xml version='1.0'?> 1268<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1269 <p>This document has been accessed 1270 <xi:include href="count.txt" parse="text"/> times.</p> 1271</document> 1272""" 1273 1274XINCLUDE["count.txt"] = "324387" 1275 1276XINCLUDE["C2b.xml"] = """\ 1277<?xml version='1.0'?> 1278<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1279 <p>This document has been <em>accessed</em> 1280 <xi:include href="count.txt" parse="text"/> times.</p> 1281</document> 1282""" 1283 1284XINCLUDE["C3.xml"] = """\ 1285<?xml version='1.0'?> 1286<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1287 <p>The following is the source of the "data.xml" resource:</p> 1288 <example><xi:include href="data.xml" parse="text"/></example> 1289</document> 1290""" 1291 1292XINCLUDE["data.xml"] = """\ 1293<?xml version='1.0'?> 1294<data> 1295 <item><![CDATA[Brooks & Shields]]></item> 1296</data> 1297""" 1298 1299XINCLUDE["C5.xml"] = """\ 1300<?xml version='1.0'?> 1301<div xmlns:xi="http://www.w3.org/2001/XInclude"> 1302 <xi:include href="example.txt" parse="text"> 1303 <xi:fallback> 1304 <xi:include href="fallback-example.txt" parse="text"> 1305 <xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback> 1306 </xi:include> 1307 </xi:fallback> 1308 </xi:include> 1309</div> 1310""" 1311 1312XINCLUDE["default.xml"] = """\ 1313<?xml version='1.0'?> 1314<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1315 <p>Example.</p> 1316 <xi:include href="{}"/> 1317</document> 1318""".format(cgi.escape(SIMPLE_XMLFILE, True)) 1319 1320def xinclude_loader(href, parse="xml", encoding=None): 1321 try: 1322 data = XINCLUDE[href] 1323 except KeyError: 1324 raise IOError("resource not found") 1325 if parse == "xml": 1326 from xml.etree.ElementTree import XML 1327 return XML(data) 1328 return data 1329 1330def xinclude(): 1331 r""" 1332 Basic inclusion example (XInclude C.1) 1333 1334 >>> from xml.etree import ElementTree as ET 1335 >>> from xml.etree import ElementInclude 1336 1337 >>> document = xinclude_loader("C1.xml") 1338 >>> ElementInclude.include(document, xinclude_loader) 1339 >>> print serialize(document) # C1 1340 <document> 1341 <p>120 Mz is adequate for an average home user.</p> 1342 <disclaimer> 1343 <p>The opinions represented herein represent those of the individual 1344 and should not be interpreted as official policy endorsed by this 1345 organization.</p> 1346 </disclaimer> 1347 </document> 1348 1349 Textual inclusion example (XInclude C.2) 1350 1351 >>> document = xinclude_loader("C2.xml") 1352 >>> ElementInclude.include(document, xinclude_loader) 1353 >>> print serialize(document) # C2 1354 <document> 1355 <p>This document has been accessed 1356 324387 times.</p> 1357 </document> 1358 1359 Textual inclusion after sibling element (based on modified XInclude C.2) 1360 1361 >>> document = xinclude_loader("C2b.xml") 1362 >>> ElementInclude.include(document, xinclude_loader) 1363 >>> print(serialize(document)) # C2b 1364 <document> 1365 <p>This document has been <em>accessed</em> 1366 324387 times.</p> 1367 </document> 1368 1369 Textual inclusion of XML example (XInclude C.3) 1370 1371 >>> document = xinclude_loader("C3.xml") 1372 >>> ElementInclude.include(document, xinclude_loader) 1373 >>> print serialize(document) # C3 1374 <document> 1375 <p>The following is the source of the "data.xml" resource:</p> 1376 <example><?xml version='1.0'?> 1377 <data> 1378 <item><![CDATA[Brooks & Shields]]></item> 1379 </data> 1380 </example> 1381 </document> 1382 1383 Fallback example (XInclude C.5) 1384 Note! Fallback support is not yet implemented 1385 1386 >>> document = xinclude_loader("C5.xml") 1387 >>> ElementInclude.include(document, xinclude_loader) 1388 Traceback (most recent call last): 1389 IOError: resource not found 1390 >>> # print serialize(document) # C5 1391 """ 1392 1393def xinclude_default(): 1394 """ 1395 >>> from xml.etree import ElementInclude 1396 1397 >>> document = xinclude_loader("default.xml") 1398 >>> ElementInclude.include(document) 1399 >>> print serialize(document) # default 1400 <document> 1401 <p>Example.</p> 1402 <root> 1403 <element key="value">text</element> 1404 <element>text</element>tail 1405 <empty-element /> 1406 </root> 1407 </document> 1408 """ 1409 1410# 1411# badly formatted xi:include tags 1412 1413XINCLUDE_BAD = {} 1414 1415XINCLUDE_BAD["B1.xml"] = """\ 1416<?xml version='1.0'?> 1417<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1418 <p>120 Mz is adequate for an average home user.</p> 1419 <xi:include href="disclaimer.xml" parse="BAD_TYPE"/> 1420</document> 1421""" 1422 1423XINCLUDE_BAD["B2.xml"] = """\ 1424<?xml version='1.0'?> 1425<div xmlns:xi="http://www.w3.org/2001/XInclude"> 1426 <xi:fallback></xi:fallback> 1427</div> 1428""" 1429 1430def xinclude_failures(): 1431 r""" 1432 Test failure to locate included XML file. 1433 1434 >>> from xml.etree import ElementInclude 1435 1436 >>> def none_loader(href, parser, encoding=None): 1437 ... return None 1438 1439 >>> document = ET.XML(XINCLUDE["C1.xml"]) 1440 >>> ElementInclude.include(document, loader=none_loader) 1441 Traceback (most recent call last): 1442 FatalIncludeError: cannot load 'disclaimer.xml' as 'xml' 1443 1444 Test failure to locate included text file. 1445 1446 >>> document = ET.XML(XINCLUDE["C2.xml"]) 1447 >>> ElementInclude.include(document, loader=none_loader) 1448 Traceback (most recent call last): 1449 FatalIncludeError: cannot load 'count.txt' as 'text' 1450 1451 Test bad parse type. 1452 1453 >>> document = ET.XML(XINCLUDE_BAD["B1.xml"]) 1454 >>> ElementInclude.include(document, loader=none_loader) 1455 Traceback (most recent call last): 1456 FatalIncludeError: unknown parse type in xi:include tag ('BAD_TYPE') 1457 1458 Test xi:fallback outside xi:include. 1459 1460 >>> document = ET.XML(XINCLUDE_BAD["B2.xml"]) 1461 >>> ElementInclude.include(document, loader=none_loader) 1462 Traceback (most recent call last): 1463 FatalIncludeError: xi:fallback tag must be child of xi:include ('{http://www.w3.org/2001/XInclude}fallback') 1464 """ 1465 1466# -------------------------------------------------------------------- 1467# reported bugs 1468 1469def bug_xmltoolkit21(): 1470 """ 1471 1472 marshaller gives obscure errors for non-string values 1473 1474 >>> elem = ET.Element(123) 1475 >>> serialize(elem) # tag 1476 Traceback (most recent call last): 1477 TypeError: cannot serialize 123 (type int) 1478 >>> elem = ET.Element("elem") 1479 >>> elem.text = 123 1480 >>> serialize(elem) # text 1481 Traceback (most recent call last): 1482 TypeError: cannot serialize 123 (type int) 1483 >>> elem = ET.Element("elem") 1484 >>> elem.tail = 123 1485 >>> serialize(elem) # tail 1486 Traceback (most recent call last): 1487 TypeError: cannot serialize 123 (type int) 1488 >>> elem = ET.Element("elem") 1489 >>> elem.set(123, "123") 1490 >>> serialize(elem) # attribute key 1491 Traceback (most recent call last): 1492 TypeError: cannot serialize 123 (type int) 1493 >>> elem = ET.Element("elem") 1494 >>> elem.set("123", 123) 1495 >>> serialize(elem) # attribute value 1496 Traceback (most recent call last): 1497 TypeError: cannot serialize 123 (type int) 1498 1499 """ 1500 1501def bug_xmltoolkit25(): 1502 """ 1503 1504 typo in ElementTree.findtext 1505 1506 >>> elem = ET.XML(SAMPLE_XML) 1507 >>> tree = ET.ElementTree(elem) 1508 >>> tree.findtext("tag") 1509 'text' 1510 >>> tree.findtext("section/tag") 1511 'subtext' 1512 1513 """ 1514 1515def bug_xmltoolkit28(): 1516 """ 1517 1518 .//tag causes exceptions 1519 1520 >>> tree = ET.XML("<doc><table><tbody/></table></doc>") 1521 >>> summarize_list(tree.findall(".//thead")) 1522 [] 1523 >>> summarize_list(tree.findall(".//tbody")) 1524 ['tbody'] 1525 1526 """ 1527 1528def bug_xmltoolkitX1(): 1529 """ 1530 1531 dump() doesn't flush the output buffer 1532 1533 >>> tree = ET.XML("<doc><table><tbody/></table></doc>") 1534 >>> ET.dump(tree); sys.stdout.write("tail") 1535 <doc><table><tbody /></table></doc> 1536 tail 1537 1538 """ 1539 1540def bug_xmltoolkit39(): 1541 """ 1542 1543 non-ascii element and attribute names doesn't work 1544 1545 >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />") 1546 >>> ET.tostring(tree, "utf-8") 1547 '<t\\xc3\\xa4g />' 1548 1549 >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><tag \xe4ttr='välue' />") 1550 >>> tree.attrib 1551 {u'\\xe4ttr': u'v\\xe4lue'} 1552 >>> ET.tostring(tree, "utf-8") 1553 '<tag \\xc3\\xa4ttr="v\\xc3\\xa4lue" />' 1554 1555 >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g>text</t\xe4g>") 1556 >>> ET.tostring(tree, "utf-8") 1557 '<t\\xc3\\xa4g>text</t\\xc3\\xa4g>' 1558 1559 >>> tree = ET.Element(u"t\u00e4g") 1560 >>> ET.tostring(tree, "utf-8") 1561 '<t\\xc3\\xa4g />' 1562 1563 >>> tree = ET.Element("tag") 1564 >>> tree.set(u"\u00e4ttr", u"v\u00e4lue") 1565 >>> ET.tostring(tree, "utf-8") 1566 '<tag \\xc3\\xa4ttr="v\\xc3\\xa4lue" />' 1567 1568 """ 1569 1570def bug_xmltoolkit54(): 1571 """ 1572 1573 problems handling internally defined entities 1574 1575 >>> e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '舰'>]><doc>&ldots;</doc>") 1576 >>> serialize(e) 1577 '<doc>舰</doc>' 1578 1579 """ 1580 1581def bug_xmltoolkit55(): 1582 """ 1583 1584 make sure we're reporting the first error, not the last 1585 1586 >>> e = ET.XML("<!DOCTYPE doc SYSTEM 'doc.dtd'><doc>&ldots;&ndots;&rdots;</doc>") 1587 Traceback (most recent call last): 1588 ParseError: undefined entity &ldots;: line 1, column 36 1589 1590 """ 1591 1592class ExceptionFile: 1593 def read(self, x): 1594 raise IOError 1595 1596def xmltoolkit60(): 1597 """ 1598 1599 Handle crash in stream source. 1600 >>> tree = ET.parse(ExceptionFile()) 1601 Traceback (most recent call last): 1602 IOError 1603 1604 """ 1605 1606XMLTOOLKIT62_DOC = """<?xml version="1.0" encoding="UTF-8"?> 1607<!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []> 1608<patent-application-publication> 1609<subdoc-abstract> 1610<paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named ‘BCT9801BEG’.</paragraph> 1611</subdoc-abstract> 1612</patent-application-publication>""" 1613 1614 1615def xmltoolkit62(): 1616 """ 1617 1618 Don't crash when using custom entities. 1619 1620 >>> xmltoolkit62() 1621 u'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.' 1622 1623 """ 1624 ENTITIES = {u'rsquo': u'\u2019', u'lsquo': u'\u2018'} 1625 parser = ET.XMLTreeBuilder() 1626 parser.entity.update(ENTITIES) 1627 parser.feed(XMLTOOLKIT62_DOC) 1628 t = parser.close() 1629 return t.find('.//paragraph').text 1630 1631def xmltoolkit63(): 1632 """ 1633 1634 Check reference leak. 1635 >>> xmltoolkit63() 1636 >>> count = sys.getrefcount(None) 1637 >>> for i in range(1000): 1638 ... xmltoolkit63() 1639 >>> sys.getrefcount(None) - count 1640 0 1641 1642 """ 1643 tree = ET.TreeBuilder() 1644 tree.start("tag", {}) 1645 tree.data("text") 1646 tree.end("tag") 1647 1648# -------------------------------------------------------------------- 1649 1650 1651def bug_200708_newline(): 1652 r""" 1653 1654 Preserve newlines in attributes. 1655 1656 >>> e = ET.Element('SomeTag', text="def _f():\n return 3\n") 1657 >>> ET.tostring(e) 1658 '<SomeTag text="def _f(): return 3 " />' 1659 >>> ET.XML(ET.tostring(e)).get("text") 1660 'def _f():\n return 3\n' 1661 >>> ET.tostring(ET.XML(ET.tostring(e))) 1662 '<SomeTag text="def _f(): return 3 " />' 1663 1664 """ 1665 1666def bug_200708_close(): 1667 """ 1668 1669 Test default builder. 1670 >>> parser = ET.XMLParser() # default 1671 >>> parser.feed("<element>some text</element>") 1672 >>> summarize(parser.close()) 1673 'element' 1674 1675 Test custom builder. 1676 >>> class EchoTarget: 1677 ... def close(self): 1678 ... return ET.Element("element") # simulate root 1679 >>> parser = ET.XMLParser(EchoTarget()) 1680 >>> parser.feed("<element>some text</element>") 1681 >>> summarize(parser.close()) 1682 'element' 1683 1684 """ 1685 1686def bug_200709_default_namespace(): 1687 """ 1688 1689 >>> e = ET.Element("{default}elem") 1690 >>> s = ET.SubElement(e, "{default}elem") 1691 >>> serialize(e, default_namespace="default") # 1 1692 '<elem xmlns="default"><elem /></elem>' 1693 1694 >>> e = ET.Element("{default}elem") 1695 >>> s = ET.SubElement(e, "{default}elem") 1696 >>> s = ET.SubElement(e, "{not-default}elem") 1697 >>> serialize(e, default_namespace="default") # 2 1698 '<elem xmlns="default" xmlns:ns1="not-default"><elem /><ns1:elem /></elem>' 1699 1700 >>> e = ET.Element("{default}elem") 1701 >>> s = ET.SubElement(e, "{default}elem") 1702 >>> s = ET.SubElement(e, "elem") # unprefixed name 1703 >>> serialize(e, default_namespace="default") # 3 1704 Traceback (most recent call last): 1705 ValueError: cannot use non-qualified names with default_namespace option 1706 1707 """ 1708 1709def bug_200709_register_namespace(): 1710 """ 1711 1712 >>> ET.tostring(ET.Element("{http://namespace.invalid/does/not/exist/}title")) 1713 '<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />' 1714 >>> ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/") 1715 >>> ET.tostring(ET.Element("{http://namespace.invalid/does/not/exist/}title")) 1716 '<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />' 1717 1718 And the Dublin Core namespace is in the default list: 1719 1720 >>> ET.tostring(ET.Element("{http://purl.org/dc/elements/1.1/}title")) 1721 '<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />' 1722 1723 """ 1724 1725def bug_200709_element_comment(): 1726 """ 1727 1728 Not sure if this can be fixed, really (since the serializer needs 1729 ET.Comment, not cET.comment). 1730 1731 >>> a = ET.Element('a') 1732 >>> a.append(ET.Comment('foo')) 1733 >>> a[0].tag == ET.Comment 1734 True 1735 1736 >>> a = ET.Element('a') 1737 >>> a.append(ET.PI('foo')) 1738 >>> a[0].tag == ET.PI 1739 True 1740 1741 """ 1742 1743def bug_200709_element_insert(): 1744 """ 1745 1746 >>> a = ET.Element('a') 1747 >>> b = ET.SubElement(a, 'b') 1748 >>> c = ET.SubElement(a, 'c') 1749 >>> d = ET.Element('d') 1750 >>> a.insert(0, d) 1751 >>> summarize_list(a) 1752 ['d', 'b', 'c'] 1753 >>> a.insert(-1, d) 1754 >>> summarize_list(a) 1755 ['d', 'b', 'd', 'c'] 1756 1757 """ 1758 1759def bug_200709_iter_comment(): 1760 """ 1761 1762 >>> a = ET.Element('a') 1763 >>> b = ET.SubElement(a, 'b') 1764 >>> comment_b = ET.Comment("TEST-b") 1765 >>> b.append(comment_b) 1766 >>> summarize_list(a.iter(ET.Comment)) 1767 ['<Comment>'] 1768 1769 """ 1770 1771# -------------------------------------------------------------------- 1772# reported on bugs.python.org 1773 1774def bug_1534630(): 1775 """ 1776 1777 >>> bob = ET.TreeBuilder() 1778 >>> e = bob.data("data") 1779 >>> e = bob.start("tag", {}) 1780 >>> e = bob.end("tag") 1781 >>> e = bob.close() 1782 >>> serialize(e) 1783 '<tag />' 1784 1785 """ 1786 1787def check_issue6233(): 1788 """ 1789 1790 >>> e = ET.XML("<?xml version='1.0' encoding='utf-8'?><body>t\\xc3\\xa3g</body>") 1791 >>> ET.tostring(e, 'ascii') 1792 "<?xml version='1.0' encoding='ascii'?>\\n<body>tãg</body>" 1793 >>> e = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><body>t\\xe3g</body>") 1794 >>> ET.tostring(e, 'ascii') 1795 "<?xml version='1.0' encoding='ascii'?>\\n<body>tãg</body>" 1796 1797 """ 1798 1799def check_issue3151(): 1800 """ 1801 1802 >>> e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>') 1803 >>> e.tag 1804 '{${stuff}}localname' 1805 >>> t = ET.ElementTree(e) 1806 >>> ET.tostring(e) 1807 '<ns0:localname xmlns:ns0="${stuff}" />' 1808 1809 """ 1810 1811def check_issue6565(): 1812 """ 1813 1814 >>> elem = ET.XML("<body><tag/></body>") 1815 >>> summarize_list(elem) 1816 ['tag'] 1817 >>> newelem = ET.XML(SAMPLE_XML) 1818 >>> elem[:] = newelem[:] 1819 >>> summarize_list(elem) 1820 ['tag', 'tag', 'section'] 1821 1822 """ 1823 1824# -------------------------------------------------------------------- 1825 1826 1827class CleanContext(object): 1828 """Provide default namespace mapping and path cache.""" 1829 checkwarnings = None 1830 1831 def __init__(self, quiet=False): 1832 if sys.flags.optimize >= 2: 1833 # under -OO, doctests cannot be run and therefore not all warnings 1834 # will be emitted 1835 quiet = True 1836 deprecations = ( 1837 # Search behaviour is broken if search path starts with "/". 1838 ("This search is broken in 1.3 and earlier, and will be fixed " 1839 "in a future version. If you rely on the current behaviour, " 1840 "change it to '.+'", FutureWarning), 1841 # Element.getchildren() and Element.getiterator() are deprecated. 1842 ("This method will be removed in future versions. " 1843 "Use .+ instead.", DeprecationWarning), 1844 ("This method will be removed in future versions. " 1845 "Use .+ instead.", PendingDeprecationWarning), 1846 # XMLParser.doctype() is deprecated. 1847 ("This method of XMLParser is deprecated. Define doctype.. " 1848 "method on the TreeBuilder target.", DeprecationWarning)) 1849 self.checkwarnings = test_support.check_warnings(*deprecations, 1850 quiet=quiet) 1851 1852 def __enter__(self): 1853 from xml.etree import ElementTree 1854 self._nsmap = ElementTree._namespace_map 1855 self._path_cache = ElementTree.ElementPath._cache 1856 # Copy the default namespace mapping 1857 ElementTree._namespace_map = self._nsmap.copy() 1858 # Copy the path cache (should be empty) 1859 ElementTree.ElementPath._cache = self._path_cache.copy() 1860 self.checkwarnings.__enter__() 1861 1862 def __exit__(self, *args): 1863 from xml.etree import ElementTree 1864 # Restore mapping and path cache 1865 ElementTree._namespace_map = self._nsmap 1866 ElementTree.ElementPath._cache = self._path_cache 1867 self.checkwarnings.__exit__(*args) 1868 1869 1870def test_main(module_name='xml.etree.ElementTree'): 1871 from test import test_xml_etree 1872 1873 use_py_module = (module_name == 'xml.etree.ElementTree') 1874 1875 # The same doctests are used for both the Python and the C implementations 1876 assert test_xml_etree.ET.__name__ == module_name 1877 1878 # XXX the C module should give the same warnings as the Python module 1879 with CleanContext(quiet=not use_py_module): 1880 test_support.run_doctest(test_xml_etree, verbosity=True) 1881 1882 # The module should not be changed by the tests 1883 assert test_xml_etree.ET.__name__ == module_name 1884 1885if __name__ == '__main__': 1886 test_main() 1887