1"""Lightweight XML support for Python. 2 3 XML is an inherently hierarchical data format, and the most natural way to 4 represent it is with a tree. This module has two classes for this purpose: 5 6 1. ElementTree represents the whole XML document as a tree and 7 8 2. Element represents a single node in this tree. 9 10 Interactions with the whole document (reading and writing to/from files) are 11 usually done on the ElementTree level. Interactions with a single XML element 12 and its sub-elements are done on the Element level. 13 14 Element is a flexible container object designed to store hierarchical data 15 structures in memory. It can be described as a cross between a list and a 16 dictionary. Each Element has a number of properties associated with it: 17 18 'tag' - a string containing the element's name. 19 20 'attributes' - a Python dictionary storing the element's attributes. 21 22 'text' - a string containing the element's text content. 23 24 'tail' - an optional string containing text after the element's end tag. 25 26 And a number of child elements stored in a Python sequence. 27 28 To create an element instance, use the Element constructor, 29 or the SubElement factory function. 30 31 You can also use the ElementTree class to wrap an element structure 32 and convert it to and from XML. 33 34""" 35 36#--------------------------------------------------------------------- 37# Licensed to PSF under a Contributor Agreement. 38# See http://www.python.org/psf/license for licensing details. 39# 40# ElementTree 41# Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved. 42# 43# fredrik@pythonware.com 44# http://www.pythonware.com 45# -------------------------------------------------------------------- 46# The ElementTree toolkit is 47# 48# Copyright (c) 1999-2008 by Fredrik Lundh 49# 50# By obtaining, using, and/or copying this software and/or its 51# associated documentation, you agree that you have read, understood, 52# and will comply with the following terms and conditions: 53# 54# Permission to use, copy, modify, and distribute this software and 55# its associated documentation for any purpose and without fee is 56# hereby granted, provided that the above copyright notice appears in 57# all copies, and that both that copyright notice and this permission 58# notice appear in supporting documentation, and that the name of 59# Secret Labs AB or the author not be used in advertising or publicity 60# pertaining to distribution of the software without specific, written 61# prior permission. 62# 63# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 64# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 65# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 66# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 67# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 68# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 69# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 70# OF THIS SOFTWARE. 71# -------------------------------------------------------------------- 72 73__all__ = [ 74 # public symbols 75 "Comment", 76 "dump", 77 "Element", "ElementTree", 78 "fromstring", "fromstringlist", 79 "iselement", "iterparse", 80 "parse", "ParseError", 81 "PI", "ProcessingInstruction", 82 "QName", 83 "SubElement", 84 "tostring", "tostringlist", 85 "TreeBuilder", 86 "VERSION", 87 "XML", "XMLID", 88 "XMLParser", "XMLPullParser", 89 "register_namespace", 90 ] 91 92VERSION = "1.3.0" 93 94import sys 95import re 96import warnings 97import io 98import collections 99import collections.abc 100import contextlib 101 102from . import ElementPath 103 104 105class ParseError(SyntaxError): 106 """An error when parsing an XML document. 107 108 In addition to its exception value, a ParseError contains 109 two extra attributes: 110 'code' - the specific exception code 111 'position' - the line and column of the error 112 113 """ 114 pass 115 116# -------------------------------------------------------------------- 117 118 119def iselement(element): 120 """Return True if *element* appears to be an Element.""" 121 return hasattr(element, 'tag') 122 123 124class Element: 125 """An XML element. 126 127 This class is the reference implementation of the Element interface. 128 129 An element's length is its number of subelements. That means if you 130 want to check if an element is truly empty, you should check BOTH 131 its length AND its text attribute. 132 133 The element tag, attribute names, and attribute values can be either 134 bytes or strings. 135 136 *tag* is the element name. *attrib* is an optional dictionary containing 137 element attributes. *extra* are additional element attributes given as 138 keyword arguments. 139 140 Example form: 141 <tag attrib>text<child/>...</tag>tail 142 143 """ 144 145 tag = None 146 """The element's name.""" 147 148 attrib = None 149 """Dictionary of the element's attributes.""" 150 151 text = None 152 """ 153 Text before first subelement. This is either a string or the value None. 154 Note that if there is no text, this attribute may be either 155 None or the empty string, depending on the parser. 156 157 """ 158 159 tail = None 160 """ 161 Text after this element's end tag, but before the next sibling element's 162 start tag. This is either a string or the value None. Note that if there 163 was no text, this attribute may be either None or an empty string, 164 depending on the parser. 165 166 """ 167 168 def __init__(self, tag, attrib={}, **extra): 169 if not isinstance(attrib, dict): 170 raise TypeError("attrib must be dict, not %s" % ( 171 attrib.__class__.__name__,)) 172 attrib = attrib.copy() 173 attrib.update(extra) 174 self.tag = tag 175 self.attrib = attrib 176 self._children = [] 177 178 def __repr__(self): 179 return "<%s %r at %#x>" % (self.__class__.__name__, self.tag, id(self)) 180 181 def makeelement(self, tag, attrib): 182 """Create a new element with the same type. 183 184 *tag* is a string containing the element name. 185 *attrib* is a dictionary containing the element attributes. 186 187 Do not call this method, use the SubElement factory function instead. 188 189 """ 190 return self.__class__(tag, attrib) 191 192 def copy(self): 193 """Return copy of current element. 194 195 This creates a shallow copy. Subelements will be shared with the 196 original tree. 197 198 """ 199 elem = self.makeelement(self.tag, self.attrib) 200 elem.text = self.text 201 elem.tail = self.tail 202 elem[:] = self 203 return elem 204 205 def __len__(self): 206 return len(self._children) 207 208 def __bool__(self): 209 warnings.warn( 210 "The behavior of this method will change in future versions. " 211 "Use specific 'len(elem)' or 'elem is not None' test instead.", 212 FutureWarning, stacklevel=2 213 ) 214 return len(self._children) != 0 # emulate old behaviour, for now 215 216 def __getitem__(self, index): 217 return self._children[index] 218 219 def __setitem__(self, index, element): 220 # if isinstance(index, slice): 221 # for elt in element: 222 # assert iselement(elt) 223 # else: 224 # assert iselement(element) 225 self._children[index] = element 226 227 def __delitem__(self, index): 228 del self._children[index] 229 230 def append(self, subelement): 231 """Add *subelement* to the end of this element. 232 233 The new element will appear in document order after the last existing 234 subelement (or directly after the text, if it's the first subelement), 235 but before the end tag for this element. 236 237 """ 238 self._assert_is_element(subelement) 239 self._children.append(subelement) 240 241 def extend(self, elements): 242 """Append subelements from a sequence. 243 244 *elements* is a sequence with zero or more elements. 245 246 """ 247 for element in elements: 248 self._assert_is_element(element) 249 self._children.extend(elements) 250 251 def insert(self, index, subelement): 252 """Insert *subelement* at position *index*.""" 253 self._assert_is_element(subelement) 254 self._children.insert(index, subelement) 255 256 def _assert_is_element(self, e): 257 # Need to refer to the actual Python implementation, not the 258 # shadowing C implementation. 259 if not isinstance(e, _Element_Py): 260 raise TypeError('expected an Element, not %s' % type(e).__name__) 261 262 def remove(self, subelement): 263 """Remove matching subelement. 264 265 Unlike the find methods, this method compares elements based on 266 identity, NOT ON tag value or contents. To remove subelements by 267 other means, the easiest way is to use a list comprehension to 268 select what elements to keep, and then use slice assignment to update 269 the parent element. 270 271 ValueError is raised if a matching element could not be found. 272 273 """ 274 # assert iselement(element) 275 self._children.remove(subelement) 276 277 def getchildren(self): 278 """(Deprecated) Return all subelements. 279 280 Elements are returned in document order. 281 282 """ 283 warnings.warn( 284 "This method will be removed in future versions. " 285 "Use 'list(elem)' or iteration over elem instead.", 286 DeprecationWarning, stacklevel=2 287 ) 288 return self._children 289 290 def find(self, path, namespaces=None): 291 """Find first matching element by tag name or path. 292 293 *path* is a string having either an element tag or an XPath, 294 *namespaces* is an optional mapping from namespace prefix to full name. 295 296 Return the first matching element, or None if no element was found. 297 298 """ 299 return ElementPath.find(self, path, namespaces) 300 301 def findtext(self, path, default=None, namespaces=None): 302 """Find text for first matching element by tag name or path. 303 304 *path* is a string having either an element tag or an XPath, 305 *default* is the value to return if the element was not found, 306 *namespaces* is an optional mapping from namespace prefix to full name. 307 308 Return text content of first matching element, or default value if 309 none was found. Note that if an element is found having no text 310 content, the empty string is returned. 311 312 """ 313 return ElementPath.findtext(self, path, default, namespaces) 314 315 def findall(self, path, namespaces=None): 316 """Find all matching subelements by tag name or path. 317 318 *path* is a string having either an element tag or an XPath, 319 *namespaces* is an optional mapping from namespace prefix to full name. 320 321 Returns list containing all matching elements in document order. 322 323 """ 324 return ElementPath.findall(self, path, namespaces) 325 326 def iterfind(self, path, namespaces=None): 327 """Find all matching subelements by tag name or path. 328 329 *path* is a string having either an element tag or an XPath, 330 *namespaces* is an optional mapping from namespace prefix to full name. 331 332 Return an iterable yielding all matching elements in document order. 333 334 """ 335 return ElementPath.iterfind(self, path, namespaces) 336 337 def clear(self): 338 """Reset element. 339 340 This function removes all subelements, clears all attributes, and sets 341 the text and tail attributes to None. 342 343 """ 344 self.attrib.clear() 345 self._children = [] 346 self.text = self.tail = None 347 348 def get(self, key, default=None): 349 """Get element attribute. 350 351 Equivalent to attrib.get, but some implementations may handle this a 352 bit more efficiently. *key* is what attribute to look for, and 353 *default* is what to return if the attribute was not found. 354 355 Returns a string containing the attribute value, or the default if 356 attribute was not found. 357 358 """ 359 return self.attrib.get(key, default) 360 361 def set(self, key, value): 362 """Set element attribute. 363 364 Equivalent to attrib[key] = value, but some implementations may handle 365 this a bit more efficiently. *key* is what attribute to set, and 366 *value* is the attribute value to set it to. 367 368 """ 369 self.attrib[key] = value 370 371 def keys(self): 372 """Get list of attribute names. 373 374 Names are returned in an arbitrary order, just like an ordinary 375 Python dict. Equivalent to attrib.keys() 376 377 """ 378 return self.attrib.keys() 379 380 def items(self): 381 """Get element attributes as a sequence. 382 383 The attributes are returned in arbitrary order. Equivalent to 384 attrib.items(). 385 386 Return a list of (name, value) tuples. 387 388 """ 389 return self.attrib.items() 390 391 def iter(self, tag=None): 392 """Create tree iterator. 393 394 The iterator loops over the element and all subelements in document 395 order, returning all elements with a matching tag. 396 397 If the tree structure is modified during iteration, new or removed 398 elements may or may not be included. To get a stable set, use the 399 list() function on the iterator, and loop over the resulting list. 400 401 *tag* is what tags to look for (default is to return all elements) 402 403 Return an iterator containing all the matching elements. 404 405 """ 406 if tag == "*": 407 tag = None 408 if tag is None or self.tag == tag: 409 yield self 410 for e in self._children: 411 yield from e.iter(tag) 412 413 # compatibility 414 def getiterator(self, tag=None): 415 # Change for a DeprecationWarning in 1.4 416 warnings.warn( 417 "This method will be removed in future versions. " 418 "Use 'elem.iter()' or 'list(elem.iter())' instead.", 419 PendingDeprecationWarning, stacklevel=2 420 ) 421 return list(self.iter(tag)) 422 423 def itertext(self): 424 """Create text iterator. 425 426 The iterator loops over the element and all subelements in document 427 order, returning all inner text. 428 429 """ 430 tag = self.tag 431 if not isinstance(tag, str) and tag is not None: 432 return 433 t = self.text 434 if t: 435 yield t 436 for e in self: 437 yield from e.itertext() 438 t = e.tail 439 if t: 440 yield t 441 442 443def SubElement(parent, tag, attrib={}, **extra): 444 """Subelement factory which creates an element instance, and appends it 445 to an existing parent. 446 447 The element tag, attribute names, and attribute values can be either 448 bytes or Unicode strings. 449 450 *parent* is the parent element, *tag* is the subelements name, *attrib* is 451 an optional directory containing element attributes, *extra* are 452 additional attributes given as keyword arguments. 453 454 """ 455 attrib = attrib.copy() 456 attrib.update(extra) 457 element = parent.makeelement(tag, attrib) 458 parent.append(element) 459 return element 460 461 462def Comment(text=None): 463 """Comment element factory. 464 465 This function creates a special element which the standard serializer 466 serializes as an XML comment. 467 468 *text* is a string containing the comment string. 469 470 """ 471 element = Element(Comment) 472 element.text = text 473 return element 474 475 476def ProcessingInstruction(target, text=None): 477 """Processing Instruction element factory. 478 479 This function creates a special element which the standard serializer 480 serializes as an XML comment. 481 482 *target* is a string containing the processing instruction, *text* is a 483 string containing the processing instruction contents, if any. 484 485 """ 486 element = Element(ProcessingInstruction) 487 element.text = target 488 if text: 489 element.text = element.text + " " + text 490 return element 491 492PI = ProcessingInstruction 493 494 495class QName: 496 """Qualified name wrapper. 497 498 This class can be used to wrap a QName attribute value in order to get 499 proper namespace handing on output. 500 501 *text_or_uri* is a string containing the QName value either in the form 502 {uri}local, or if the tag argument is given, the URI part of a QName. 503 504 *tag* is an optional argument which if given, will make the first 505 argument (text_or_uri) be interpreted as a URI, and this argument (tag) 506 be interpreted as a local name. 507 508 """ 509 def __init__(self, text_or_uri, tag=None): 510 if tag: 511 text_or_uri = "{%s}%s" % (text_or_uri, tag) 512 self.text = text_or_uri 513 def __str__(self): 514 return self.text 515 def __repr__(self): 516 return '<%s %r>' % (self.__class__.__name__, self.text) 517 def __hash__(self): 518 return hash(self.text) 519 def __le__(self, other): 520 if isinstance(other, QName): 521 return self.text <= other.text 522 return self.text <= other 523 def __lt__(self, other): 524 if isinstance(other, QName): 525 return self.text < other.text 526 return self.text < other 527 def __ge__(self, other): 528 if isinstance(other, QName): 529 return self.text >= other.text 530 return self.text >= other 531 def __gt__(self, other): 532 if isinstance(other, QName): 533 return self.text > other.text 534 return self.text > other 535 def __eq__(self, other): 536 if isinstance(other, QName): 537 return self.text == other.text 538 return self.text == other 539 540# -------------------------------------------------------------------- 541 542 543class ElementTree: 544 """An XML element hierarchy. 545 546 This class also provides support for serialization to and from 547 standard XML. 548 549 *element* is an optional root element node, 550 *file* is an optional file handle or file name of an XML file whose 551 contents will be used to initialize the tree with. 552 553 """ 554 def __init__(self, element=None, file=None): 555 # assert element is None or iselement(element) 556 self._root = element # first node 557 if file: 558 self.parse(file) 559 560 def getroot(self): 561 """Return root element of this tree.""" 562 return self._root 563 564 def _setroot(self, element): 565 """Replace root element of this tree. 566 567 This will discard the current contents of the tree and replace it 568 with the given element. Use with care! 569 570 """ 571 # assert iselement(element) 572 self._root = element 573 574 def parse(self, source, parser=None): 575 """Load external XML document into element tree. 576 577 *source* is a file name or file object, *parser* is an optional parser 578 instance that defaults to XMLParser. 579 580 ParseError is raised if the parser fails to parse the document. 581 582 Returns the root element of the given source document. 583 584 """ 585 close_source = False 586 if not hasattr(source, "read"): 587 source = open(source, "rb") 588 close_source = True 589 try: 590 if parser is None: 591 # If no parser was specified, create a default XMLParser 592 parser = XMLParser() 593 if hasattr(parser, '_parse_whole'): 594 # The default XMLParser, when it comes from an accelerator, 595 # can define an internal _parse_whole API for efficiency. 596 # It can be used to parse the whole source without feeding 597 # it with chunks. 598 self._root = parser._parse_whole(source) 599 return self._root 600 while True: 601 data = source.read(65536) 602 if not data: 603 break 604 parser.feed(data) 605 self._root = parser.close() 606 return self._root 607 finally: 608 if close_source: 609 source.close() 610 611 def iter(self, tag=None): 612 """Create and return tree iterator for the root element. 613 614 The iterator loops over all elements in this tree, in document order. 615 616 *tag* is a string with the tag name to iterate over 617 (default is to return all elements). 618 619 """ 620 # assert self._root is not None 621 return self._root.iter(tag) 622 623 # compatibility 624 def getiterator(self, tag=None): 625 # Change for a DeprecationWarning in 1.4 626 warnings.warn( 627 "This method will be removed in future versions. " 628 "Use 'tree.iter()' or 'list(tree.iter())' instead.", 629 PendingDeprecationWarning, stacklevel=2 630 ) 631 return list(self.iter(tag)) 632 633 def find(self, path, namespaces=None): 634 """Find first matching element by tag name or path. 635 636 Same as getroot().find(path), which is Element.find() 637 638 *path* is a string having either an element tag or an XPath, 639 *namespaces* is an optional mapping from namespace prefix to full name. 640 641 Return the first matching element, or None if no element was found. 642 643 """ 644 # assert self._root is not None 645 if path[:1] == "/": 646 path = "." + path 647 warnings.warn( 648 "This search is broken in 1.3 and earlier, and will be " 649 "fixed in a future version. If you rely on the current " 650 "behaviour, change it to %r" % path, 651 FutureWarning, stacklevel=2 652 ) 653 return self._root.find(path, namespaces) 654 655 def findtext(self, path, default=None, namespaces=None): 656 """Find first matching element by tag name or path. 657 658 Same as getroot().findtext(path), which is Element.findtext() 659 660 *path* is a string having either an element tag or an XPath, 661 *namespaces* is an optional mapping from namespace prefix to full name. 662 663 Return the first matching element, or None if no element was found. 664 665 """ 666 # assert self._root is not None 667 if path[:1] == "/": 668 path = "." + path 669 warnings.warn( 670 "This search is broken in 1.3 and earlier, and will be " 671 "fixed in a future version. If you rely on the current " 672 "behaviour, change it to %r" % path, 673 FutureWarning, stacklevel=2 674 ) 675 return self._root.findtext(path, default, namespaces) 676 677 def findall(self, path, namespaces=None): 678 """Find all matching subelements by tag name or path. 679 680 Same as getroot().findall(path), which is Element.findall(). 681 682 *path* is a string having either an element tag or an XPath, 683 *namespaces* is an optional mapping from namespace prefix to full name. 684 685 Return list containing all matching elements in document order. 686 687 """ 688 # assert self._root is not None 689 if path[:1] == "/": 690 path = "." + path 691 warnings.warn( 692 "This search is broken in 1.3 and earlier, and will be " 693 "fixed in a future version. If you rely on the current " 694 "behaviour, change it to %r" % path, 695 FutureWarning, stacklevel=2 696 ) 697 return self._root.findall(path, namespaces) 698 699 def iterfind(self, path, namespaces=None): 700 """Find all matching subelements by tag name or path. 701 702 Same as getroot().iterfind(path), which is element.iterfind() 703 704 *path* is a string having either an element tag or an XPath, 705 *namespaces* is an optional mapping from namespace prefix to full name. 706 707 Return an iterable yielding all matching elements in document order. 708 709 """ 710 # assert self._root is not None 711 if path[:1] == "/": 712 path = "." + path 713 warnings.warn( 714 "This search is broken in 1.3 and earlier, and will be " 715 "fixed in a future version. If you rely on the current " 716 "behaviour, change it to %r" % path, 717 FutureWarning, stacklevel=2 718 ) 719 return self._root.iterfind(path, namespaces) 720 721 def write(self, file_or_filename, 722 encoding=None, 723 xml_declaration=None, 724 default_namespace=None, 725 method=None, *, 726 short_empty_elements=True): 727 """Write element tree to a file as XML. 728 729 Arguments: 730 *file_or_filename* -- file name or a file object opened for writing 731 732 *encoding* -- the output encoding (default: US-ASCII) 733 734 *xml_declaration* -- bool indicating if an XML declaration should be 735 added to the output. If None, an XML declaration 736 is added if encoding IS NOT either of: 737 US-ASCII, UTF-8, or Unicode 738 739 *default_namespace* -- sets the default XML namespace (for "xmlns") 740 741 *method* -- either "xml" (default), "html, "text", or "c14n" 742 743 *short_empty_elements* -- controls the formatting of elements 744 that contain no content. If True (default) 745 they are emitted as a single self-closed 746 tag, otherwise they are emitted as a pair 747 of start/end tags 748 749 """ 750 if not method: 751 method = "xml" 752 elif method not in _serialize: 753 raise ValueError("unknown method %r" % method) 754 if not encoding: 755 if method == "c14n": 756 encoding = "utf-8" 757 else: 758 encoding = "us-ascii" 759 enc_lower = encoding.lower() 760 with _get_writer(file_or_filename, enc_lower) as write: 761 if method == "xml" and (xml_declaration or 762 (xml_declaration is None and 763 enc_lower not in ("utf-8", "us-ascii", "unicode"))): 764 declared_encoding = encoding 765 if enc_lower == "unicode": 766 # Retrieve the default encoding for the xml declaration 767 import locale 768 declared_encoding = locale.getpreferredencoding() 769 write("<?xml version='1.0' encoding='%s'?>\n" % ( 770 declared_encoding,)) 771 if method == "text": 772 _serialize_text(write, self._root) 773 else: 774 qnames, namespaces = _namespaces(self._root, default_namespace) 775 serialize = _serialize[method] 776 serialize(write, self._root, qnames, namespaces, 777 short_empty_elements=short_empty_elements) 778 779 def write_c14n(self, file): 780 # lxml.etree compatibility. use output method instead 781 return self.write(file, method="c14n") 782 783# -------------------------------------------------------------------- 784# serialization support 785 786@contextlib.contextmanager 787def _get_writer(file_or_filename, encoding): 788 # returns text write method and release all resources after using 789 try: 790 write = file_or_filename.write 791 except AttributeError: 792 # file_or_filename is a file name 793 if encoding == "unicode": 794 file = open(file_or_filename, "w") 795 else: 796 file = open(file_or_filename, "w", encoding=encoding, 797 errors="xmlcharrefreplace") 798 with file: 799 yield file.write 800 else: 801 # file_or_filename is a file-like object 802 # encoding determines if it is a text or binary writer 803 if encoding == "unicode": 804 # use a text writer as is 805 yield write 806 else: 807 # wrap a binary writer with TextIOWrapper 808 with contextlib.ExitStack() as stack: 809 if isinstance(file_or_filename, io.BufferedIOBase): 810 file = file_or_filename 811 elif isinstance(file_or_filename, io.RawIOBase): 812 file = io.BufferedWriter(file_or_filename) 813 # Keep the original file open when the BufferedWriter is 814 # destroyed 815 stack.callback(file.detach) 816 else: 817 # This is to handle passed objects that aren't in the 818 # IOBase hierarchy, but just have a write method 819 file = io.BufferedIOBase() 820 file.writable = lambda: True 821 file.write = write 822 try: 823 # TextIOWrapper uses this methods to determine 824 # if BOM (for UTF-16, etc) should be added 825 file.seekable = file_or_filename.seekable 826 file.tell = file_or_filename.tell 827 except AttributeError: 828 pass 829 file = io.TextIOWrapper(file, 830 encoding=encoding, 831 errors="xmlcharrefreplace", 832 newline="\n") 833 # Keep the original file open when the TextIOWrapper is 834 # destroyed 835 stack.callback(file.detach) 836 yield file.write 837 838def _namespaces(elem, default_namespace=None): 839 # identify namespaces used in this tree 840 841 # maps qnames to *encoded* prefix:local names 842 qnames = {None: None} 843 844 # maps uri:s to prefixes 845 namespaces = {} 846 if default_namespace: 847 namespaces[default_namespace] = "" 848 849 def add_qname(qname): 850 # calculate serialized qname representation 851 try: 852 if qname[:1] == "{": 853 uri, tag = qname[1:].rsplit("}", 1) 854 prefix = namespaces.get(uri) 855 if prefix is None: 856 prefix = _namespace_map.get(uri) 857 if prefix is None: 858 prefix = "ns%d" % len(namespaces) 859 if prefix != "xml": 860 namespaces[uri] = prefix 861 if prefix: 862 qnames[qname] = "%s:%s" % (prefix, tag) 863 else: 864 qnames[qname] = tag # default element 865 else: 866 if default_namespace: 867 # FIXME: can this be handled in XML 1.0? 868 raise ValueError( 869 "cannot use non-qualified names with " 870 "default_namespace option" 871 ) 872 qnames[qname] = qname 873 except TypeError: 874 _raise_serialization_error(qname) 875 876 # populate qname and namespaces table 877 for elem in elem.iter(): 878 tag = elem.tag 879 if isinstance(tag, QName): 880 if tag.text not in qnames: 881 add_qname(tag.text) 882 elif isinstance(tag, str): 883 if tag not in qnames: 884 add_qname(tag) 885 elif tag is not None and tag is not Comment and tag is not PI: 886 _raise_serialization_error(tag) 887 for key, value in elem.items(): 888 if isinstance(key, QName): 889 key = key.text 890 if key not in qnames: 891 add_qname(key) 892 if isinstance(value, QName) and value.text not in qnames: 893 add_qname(value.text) 894 text = elem.text 895 if isinstance(text, QName) and text.text not in qnames: 896 add_qname(text.text) 897 return qnames, namespaces 898 899def _serialize_xml(write, elem, qnames, namespaces, 900 short_empty_elements, **kwargs): 901 tag = elem.tag 902 text = elem.text 903 if tag is Comment: 904 write("<!--%s-->" % text) 905 elif tag is ProcessingInstruction: 906 write("<?%s?>" % text) 907 else: 908 tag = qnames[tag] 909 if tag is None: 910 if text: 911 write(_escape_cdata(text)) 912 for e in elem: 913 _serialize_xml(write, e, qnames, None, 914 short_empty_elements=short_empty_elements) 915 else: 916 write("<" + tag) 917 items = list(elem.items()) 918 if items or namespaces: 919 if namespaces: 920 for v, k in sorted(namespaces.items(), 921 key=lambda x: x[1]): # sort on prefix 922 if k: 923 k = ":" + k 924 write(" xmlns%s=\"%s\"" % ( 925 k, 926 _escape_attrib(v) 927 )) 928 for k, v in sorted(items): # lexical order 929 if isinstance(k, QName): 930 k = k.text 931 if isinstance(v, QName): 932 v = qnames[v.text] 933 else: 934 v = _escape_attrib(v) 935 write(" %s=\"%s\"" % (qnames[k], v)) 936 if text or len(elem) or not short_empty_elements: 937 write(">") 938 if text: 939 write(_escape_cdata(text)) 940 for e in elem: 941 _serialize_xml(write, e, qnames, None, 942 short_empty_elements=short_empty_elements) 943 write("</" + tag + ">") 944 else: 945 write(" />") 946 if elem.tail: 947 write(_escape_cdata(elem.tail)) 948 949HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", 950 "img", "input", "isindex", "link", "meta", "param") 951 952try: 953 HTML_EMPTY = set(HTML_EMPTY) 954except NameError: 955 pass 956 957def _serialize_html(write, elem, qnames, namespaces, **kwargs): 958 tag = elem.tag 959 text = elem.text 960 if tag is Comment: 961 write("<!--%s-->" % _escape_cdata(text)) 962 elif tag is ProcessingInstruction: 963 write("<?%s?>" % _escape_cdata(text)) 964 else: 965 tag = qnames[tag] 966 if tag is None: 967 if text: 968 write(_escape_cdata(text)) 969 for e in elem: 970 _serialize_html(write, e, qnames, None) 971 else: 972 write("<" + tag) 973 items = list(elem.items()) 974 if items or namespaces: 975 if namespaces: 976 for v, k in sorted(namespaces.items(), 977 key=lambda x: x[1]): # sort on prefix 978 if k: 979 k = ":" + k 980 write(" xmlns%s=\"%s\"" % ( 981 k, 982 _escape_attrib(v) 983 )) 984 for k, v in sorted(items): # lexical order 985 if isinstance(k, QName): 986 k = k.text 987 if isinstance(v, QName): 988 v = qnames[v.text] 989 else: 990 v = _escape_attrib_html(v) 991 # FIXME: handle boolean attributes 992 write(" %s=\"%s\"" % (qnames[k], v)) 993 write(">") 994 ltag = tag.lower() 995 if text: 996 if ltag == "script" or ltag == "style": 997 write(text) 998 else: 999 write(_escape_cdata(text)) 1000 for e in elem: 1001 _serialize_html(write, e, qnames, None) 1002 if ltag not in HTML_EMPTY: 1003 write("</" + tag + ">") 1004 if elem.tail: 1005 write(_escape_cdata(elem.tail)) 1006 1007def _serialize_text(write, elem): 1008 for part in elem.itertext(): 1009 write(part) 1010 if elem.tail: 1011 write(elem.tail) 1012 1013_serialize = { 1014 "xml": _serialize_xml, 1015 "html": _serialize_html, 1016 "text": _serialize_text, 1017# this optional method is imported at the end of the module 1018# "c14n": _serialize_c14n, 1019} 1020 1021 1022def register_namespace(prefix, uri): 1023 """Register a namespace prefix. 1024 1025 The registry is global, and any existing mapping for either the 1026 given prefix or the namespace URI will be removed. 1027 1028 *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and 1029 attributes in this namespace will be serialized with prefix if possible. 1030 1031 ValueError is raised if prefix is reserved or is invalid. 1032 1033 """ 1034 if re.match(r"ns\d+$", prefix): 1035 raise ValueError("Prefix format reserved for internal use") 1036 for k, v in list(_namespace_map.items()): 1037 if k == uri or v == prefix: 1038 del _namespace_map[k] 1039 _namespace_map[uri] = prefix 1040 1041_namespace_map = { 1042 # "well-known" namespace prefixes 1043 "http://www.w3.org/XML/1998/namespace": "xml", 1044 "http://www.w3.org/1999/xhtml": "html", 1045 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", 1046 "http://schemas.xmlsoap.org/wsdl/": "wsdl", 1047 # xml schema 1048 "http://www.w3.org/2001/XMLSchema": "xs", 1049 "http://www.w3.org/2001/XMLSchema-instance": "xsi", 1050 # dublin core 1051 "http://purl.org/dc/elements/1.1/": "dc", 1052} 1053# For tests and troubleshooting 1054register_namespace._namespace_map = _namespace_map 1055 1056def _raise_serialization_error(text): 1057 raise TypeError( 1058 "cannot serialize %r (type %s)" % (text, type(text).__name__) 1059 ) 1060 1061def _escape_cdata(text): 1062 # escape character data 1063 try: 1064 # it's worth avoiding do-nothing calls for strings that are 1065 # shorter than 500 characters, or so. assume that's, by far, 1066 # the most common case in most applications. 1067 if "&" in text: 1068 text = text.replace("&", "&") 1069 if "<" in text: 1070 text = text.replace("<", "<") 1071 if ">" in text: 1072 text = text.replace(">", ">") 1073 return text 1074 except (TypeError, AttributeError): 1075 _raise_serialization_error(text) 1076 1077def _escape_attrib(text): 1078 # escape attribute value 1079 try: 1080 if "&" in text: 1081 text = text.replace("&", "&") 1082 if "<" in text: 1083 text = text.replace("<", "<") 1084 if ">" in text: 1085 text = text.replace(">", ">") 1086 if "\"" in text: 1087 text = text.replace("\"", """) 1088 # The following business with carriage returns is to satisfy 1089 # Section 2.11 of the XML specification, stating that 1090 # CR or CR LN should be replaced with just LN 1091 # http://www.w3.org/TR/REC-xml/#sec-line-ends 1092 if "\r\n" in text: 1093 text = text.replace("\r\n", "\n") 1094 if "\r" in text: 1095 text = text.replace("\r", "\n") 1096 #The following four lines are issue 17582 1097 if "\n" in text: 1098 text = text.replace("\n", " ") 1099 if "\t" in text: 1100 text = text.replace("\t", "	") 1101 return text 1102 except (TypeError, AttributeError): 1103 _raise_serialization_error(text) 1104 1105def _escape_attrib_html(text): 1106 # escape attribute value 1107 try: 1108 if "&" in text: 1109 text = text.replace("&", "&") 1110 if ">" in text: 1111 text = text.replace(">", ">") 1112 if "\"" in text: 1113 text = text.replace("\"", """) 1114 return text 1115 except (TypeError, AttributeError): 1116 _raise_serialization_error(text) 1117 1118# -------------------------------------------------------------------- 1119 1120def tostring(element, encoding=None, method=None, *, 1121 short_empty_elements=True): 1122 """Generate string representation of XML element. 1123 1124 All subelements are included. If encoding is "unicode", a string 1125 is returned. Otherwise a bytestring is returned. 1126 1127 *element* is an Element instance, *encoding* is an optional output 1128 encoding defaulting to US-ASCII, *method* is an optional output which can 1129 be one of "xml" (default), "html", "text" or "c14n". 1130 1131 Returns an (optionally) encoded string containing the XML data. 1132 1133 """ 1134 stream = io.StringIO() if encoding == 'unicode' else io.BytesIO() 1135 ElementTree(element).write(stream, encoding, method=method, 1136 short_empty_elements=short_empty_elements) 1137 return stream.getvalue() 1138 1139class _ListDataStream(io.BufferedIOBase): 1140 """An auxiliary stream accumulating into a list reference.""" 1141 def __init__(self, lst): 1142 self.lst = lst 1143 1144 def writable(self): 1145 return True 1146 1147 def seekable(self): 1148 return True 1149 1150 def write(self, b): 1151 self.lst.append(b) 1152 1153 def tell(self): 1154 return len(self.lst) 1155 1156def tostringlist(element, encoding=None, method=None, *, 1157 short_empty_elements=True): 1158 lst = [] 1159 stream = _ListDataStream(lst) 1160 ElementTree(element).write(stream, encoding, method=method, 1161 short_empty_elements=short_empty_elements) 1162 return lst 1163 1164 1165def dump(elem): 1166 """Write element tree or element structure to sys.stdout. 1167 1168 This function should be used for debugging only. 1169 1170 *elem* is either an ElementTree, or a single Element. The exact output 1171 format is implementation dependent. In this version, it's written as an 1172 ordinary XML file. 1173 1174 """ 1175 # debugging 1176 if not isinstance(elem, ElementTree): 1177 elem = ElementTree(elem) 1178 elem.write(sys.stdout, encoding="unicode") 1179 tail = elem.getroot().tail 1180 if not tail or tail[-1] != "\n": 1181 sys.stdout.write("\n") 1182 1183# -------------------------------------------------------------------- 1184# parsing 1185 1186 1187def parse(source, parser=None): 1188 """Parse XML document into element tree. 1189 1190 *source* is a filename or file object containing XML data, 1191 *parser* is an optional parser instance defaulting to XMLParser. 1192 1193 Return an ElementTree instance. 1194 1195 """ 1196 tree = ElementTree() 1197 tree.parse(source, parser) 1198 return tree 1199 1200 1201def iterparse(source, events=None, parser=None): 1202 """Incrementally parse XML document into ElementTree. 1203 1204 This class also reports what's going on to the user based on the 1205 *events* it is initialized with. The supported events are the strings 1206 "start", "end", "start-ns" and "end-ns" (the "ns" events are used to get 1207 detailed namespace information). If *events* is omitted, only 1208 "end" events are reported. 1209 1210 *source* is a filename or file object containing XML data, *events* is 1211 a list of events to report back, *parser* is an optional parser instance. 1212 1213 Returns an iterator providing (event, elem) pairs. 1214 1215 """ 1216 # Use the internal, undocumented _parser argument for now; When the 1217 # parser argument of iterparse is removed, this can be killed. 1218 pullparser = XMLPullParser(events=events, _parser=parser) 1219 def iterator(): 1220 try: 1221 while True: 1222 yield from pullparser.read_events() 1223 # load event buffer 1224 data = source.read(16 * 1024) 1225 if not data: 1226 break 1227 pullparser.feed(data) 1228 root = pullparser._close_and_return_root() 1229 yield from pullparser.read_events() 1230 it.root = root 1231 finally: 1232 if close_source: 1233 source.close() 1234 1235 class IterParseIterator(collections.abc.Iterator): 1236 __next__ = iterator().__next__ 1237 it = IterParseIterator() 1238 it.root = None 1239 del iterator, IterParseIterator 1240 1241 close_source = False 1242 if not hasattr(source, "read"): 1243 source = open(source, "rb") 1244 close_source = True 1245 1246 return it 1247 1248 1249class XMLPullParser: 1250 1251 def __init__(self, events=None, *, _parser=None): 1252 # The _parser argument is for internal use only and must not be relied 1253 # upon in user code. It will be removed in a future release. 1254 # See http://bugs.python.org/issue17741 for more details. 1255 1256 self._events_queue = collections.deque() 1257 self._parser = _parser or XMLParser(target=TreeBuilder()) 1258 # wire up the parser for event reporting 1259 if events is None: 1260 events = ("end",) 1261 self._parser._setevents(self._events_queue, events) 1262 1263 def feed(self, data): 1264 """Feed encoded data to parser.""" 1265 if self._parser is None: 1266 raise ValueError("feed() called after end of stream") 1267 if data: 1268 try: 1269 self._parser.feed(data) 1270 except SyntaxError as exc: 1271 self._events_queue.append(exc) 1272 1273 def _close_and_return_root(self): 1274 # iterparse needs this to set its root attribute properly :( 1275 root = self._parser.close() 1276 self._parser = None 1277 return root 1278 1279 def close(self): 1280 """Finish feeding data to parser. 1281 1282 Unlike XMLParser, does not return the root element. Use 1283 read_events() to consume elements from XMLPullParser. 1284 """ 1285 self._close_and_return_root() 1286 1287 def read_events(self): 1288 """Return an iterator over currently available (event, elem) pairs. 1289 1290 Events are consumed from the internal event queue as they are 1291 retrieved from the iterator. 1292 """ 1293 events = self._events_queue 1294 while events: 1295 event = events.popleft() 1296 if isinstance(event, Exception): 1297 raise event 1298 else: 1299 yield event 1300 1301 1302def XML(text, parser=None): 1303 """Parse XML document from string constant. 1304 1305 This function can be used to embed "XML Literals" in Python code. 1306 1307 *text* is a string containing XML data, *parser* is an 1308 optional parser instance, defaulting to the standard XMLParser. 1309 1310 Returns an Element instance. 1311 1312 """ 1313 if not parser: 1314 parser = XMLParser(target=TreeBuilder()) 1315 parser.feed(text) 1316 return parser.close() 1317 1318 1319def XMLID(text, parser=None): 1320 """Parse XML document from string constant for its IDs. 1321 1322 *text* is a string containing XML data, *parser* is an 1323 optional parser instance, defaulting to the standard XMLParser. 1324 1325 Returns an (Element, dict) tuple, in which the 1326 dict maps element id:s to elements. 1327 1328 """ 1329 if not parser: 1330 parser = XMLParser(target=TreeBuilder()) 1331 parser.feed(text) 1332 tree = parser.close() 1333 ids = {} 1334 for elem in tree.iter(): 1335 id = elem.get("id") 1336 if id: 1337 ids[id] = elem 1338 return tree, ids 1339 1340# Parse XML document from string constant. Alias for XML(). 1341fromstring = XML 1342 1343def fromstringlist(sequence, parser=None): 1344 """Parse XML document from sequence of string fragments. 1345 1346 *sequence* is a list of other sequence, *parser* is an optional parser 1347 instance, defaulting to the standard XMLParser. 1348 1349 Returns an Element instance. 1350 1351 """ 1352 if not parser: 1353 parser = XMLParser(target=TreeBuilder()) 1354 for text in sequence: 1355 parser.feed(text) 1356 return parser.close() 1357 1358# -------------------------------------------------------------------- 1359 1360 1361class TreeBuilder: 1362 """Generic element structure builder. 1363 1364 This builder converts a sequence of start, data, and end method 1365 calls to a well-formed element structure. 1366 1367 You can use this class to build an element structure using a custom XML 1368 parser, or a parser for some other XML-like format. 1369 1370 *element_factory* is an optional element factory which is called 1371 to create new Element instances, as necessary. 1372 1373 """ 1374 def __init__(self, element_factory=None): 1375 self._data = [] # data collector 1376 self._elem = [] # element stack 1377 self._last = None # last element 1378 self._tail = None # true if we're after an end tag 1379 if element_factory is None: 1380 element_factory = Element 1381 self._factory = element_factory 1382 1383 def close(self): 1384 """Flush builder buffers and return toplevel document Element.""" 1385 assert len(self._elem) == 0, "missing end tags" 1386 assert self._last is not None, "missing toplevel element" 1387 return self._last 1388 1389 def _flush(self): 1390 if self._data: 1391 if self._last is not None: 1392 text = "".join(self._data) 1393 if self._tail: 1394 assert self._last.tail is None, "internal error (tail)" 1395 self._last.tail = text 1396 else: 1397 assert self._last.text is None, "internal error (text)" 1398 self._last.text = text 1399 self._data = [] 1400 1401 def data(self, data): 1402 """Add text to current element.""" 1403 self._data.append(data) 1404 1405 def start(self, tag, attrs): 1406 """Open new element and return it. 1407 1408 *tag* is the element name, *attrs* is a dict containing element 1409 attributes. 1410 1411 """ 1412 self._flush() 1413 self._last = elem = self._factory(tag, attrs) 1414 if self._elem: 1415 self._elem[-1].append(elem) 1416 self._elem.append(elem) 1417 self._tail = 0 1418 return elem 1419 1420 def end(self, tag): 1421 """Close and return current Element. 1422 1423 *tag* is the element name. 1424 1425 """ 1426 self._flush() 1427 self._last = self._elem.pop() 1428 assert self._last.tag == tag,\ 1429 "end tag mismatch (expected %s, got %s)" % ( 1430 self._last.tag, tag) 1431 self._tail = 1 1432 return self._last 1433 1434_sentinel = ['sentinel'] 1435 1436# also see ElementTree and TreeBuilder 1437class XMLParser: 1438 """Element structure builder for XML source data based on the expat parser. 1439 1440 *html* are predefined HTML entities (deprecated and not supported), 1441 *target* is an optional target object which defaults to an instance of the 1442 standard TreeBuilder class, *encoding* is an optional encoding string 1443 which if given, overrides the encoding specified in the XML file: 1444 http://www.iana.org/assignments/character-sets 1445 1446 """ 1447 1448 def __init__(self, html=_sentinel, target=None, encoding=None): 1449 if html is not _sentinel: 1450 warnings.warn( 1451 "The html argument of XMLParser() is deprecated", 1452 DeprecationWarning, stacklevel=2) 1453 try: 1454 from xml.parsers import expat 1455 except ImportError: 1456 try: 1457 import pyexpat as expat 1458 except ImportError: 1459 raise ImportError( 1460 "No module named expat; use SimpleXMLTreeBuilder instead" 1461 ) 1462 parser = expat.ParserCreate(encoding, "}") 1463 if target is None: 1464 target = TreeBuilder() 1465 # underscored names are provided for compatibility only 1466 self.parser = self._parser = parser 1467 self.target = self._target = target 1468 self._error = expat.error 1469 self._names = {} # name memo cache 1470 # main callbacks 1471 parser.DefaultHandlerExpand = self._default 1472 if hasattr(target, 'start'): 1473 parser.StartElementHandler = self._start 1474 if hasattr(target, 'end'): 1475 parser.EndElementHandler = self._end 1476 if hasattr(target, 'data'): 1477 parser.CharacterDataHandler = target.data 1478 # miscellaneous callbacks 1479 if hasattr(target, 'comment'): 1480 parser.CommentHandler = target.comment 1481 if hasattr(target, 'pi'): 1482 parser.ProcessingInstructionHandler = target.pi 1483 # Configure pyexpat: buffering, new-style attribute handling. 1484 parser.buffer_text = 1 1485 parser.ordered_attributes = 1 1486 parser.specified_attributes = 1 1487 self._doctype = None 1488 self.entity = {} 1489 try: 1490 self.version = "Expat %d.%d.%d" % expat.version_info 1491 except AttributeError: 1492 pass # unknown 1493 1494 def _setevents(self, events_queue, events_to_report): 1495 # Internal API for XMLPullParser 1496 # events_to_report: a list of events to report during parsing (same as 1497 # the *events* of XMLPullParser's constructor. 1498 # events_queue: a list of actual parsing events that will be populated 1499 # by the underlying parser. 1500 # 1501 parser = self._parser 1502 append = events_queue.append 1503 for event_name in events_to_report: 1504 if event_name == "start": 1505 parser.ordered_attributes = 1 1506 parser.specified_attributes = 1 1507 def handler(tag, attrib_in, event=event_name, append=append, 1508 start=self._start): 1509 append((event, start(tag, attrib_in))) 1510 parser.StartElementHandler = handler 1511 elif event_name == "end": 1512 def handler(tag, event=event_name, append=append, 1513 end=self._end): 1514 append((event, end(tag))) 1515 parser.EndElementHandler = handler 1516 elif event_name == "start-ns": 1517 def handler(prefix, uri, event=event_name, append=append): 1518 append((event, (prefix or "", uri or ""))) 1519 parser.StartNamespaceDeclHandler = handler 1520 elif event_name == "end-ns": 1521 def handler(prefix, event=event_name, append=append): 1522 append((event, None)) 1523 parser.EndNamespaceDeclHandler = handler 1524 else: 1525 raise ValueError("unknown event %r" % event_name) 1526 1527 def _raiseerror(self, value): 1528 err = ParseError(value) 1529 err.code = value.code 1530 err.position = value.lineno, value.offset 1531 raise err 1532 1533 def _fixname(self, key): 1534 # expand qname, and convert name string to ascii, if possible 1535 try: 1536 name = self._names[key] 1537 except KeyError: 1538 name = key 1539 if "}" in name: 1540 name = "{" + name 1541 self._names[key] = name 1542 return name 1543 1544 def _start(self, tag, attr_list): 1545 # Handler for expat's StartElementHandler. Since ordered_attributes 1546 # is set, the attributes are reported as a list of alternating 1547 # attribute name,value. 1548 fixname = self._fixname 1549 tag = fixname(tag) 1550 attrib = {} 1551 if attr_list: 1552 for i in range(0, len(attr_list), 2): 1553 attrib[fixname(attr_list[i])] = attr_list[i+1] 1554 return self.target.start(tag, attrib) 1555 1556 def _end(self, tag): 1557 return self.target.end(self._fixname(tag)) 1558 1559 def _default(self, text): 1560 prefix = text[:1] 1561 if prefix == "&": 1562 # deal with undefined entities 1563 try: 1564 data_handler = self.target.data 1565 except AttributeError: 1566 return 1567 try: 1568 data_handler(self.entity[text[1:-1]]) 1569 except KeyError: 1570 from xml.parsers import expat 1571 err = expat.error( 1572 "undefined entity %s: line %d, column %d" % 1573 (text, self.parser.ErrorLineNumber, 1574 self.parser.ErrorColumnNumber) 1575 ) 1576 err.code = 11 # XML_ERROR_UNDEFINED_ENTITY 1577 err.lineno = self.parser.ErrorLineNumber 1578 err.offset = self.parser.ErrorColumnNumber 1579 raise err 1580 elif prefix == "<" and text[:9] == "<!DOCTYPE": 1581 self._doctype = [] # inside a doctype declaration 1582 elif self._doctype is not None: 1583 # parse doctype contents 1584 if prefix == ">": 1585 self._doctype = None 1586 return 1587 text = text.strip() 1588 if not text: 1589 return 1590 self._doctype.append(text) 1591 n = len(self._doctype) 1592 if n > 2: 1593 type = self._doctype[1] 1594 if type == "PUBLIC" and n == 4: 1595 name, type, pubid, system = self._doctype 1596 if pubid: 1597 pubid = pubid[1:-1] 1598 elif type == "SYSTEM" and n == 3: 1599 name, type, system = self._doctype 1600 pubid = None 1601 else: 1602 return 1603 if hasattr(self.target, "doctype"): 1604 self.target.doctype(name, pubid, system[1:-1]) 1605 elif self.doctype != self._XMLParser__doctype: 1606 # warn about deprecated call 1607 self._XMLParser__doctype(name, pubid, system[1:-1]) 1608 self.doctype(name, pubid, system[1:-1]) 1609 self._doctype = None 1610 1611 def doctype(self, name, pubid, system): 1612 """(Deprecated) Handle doctype declaration 1613 1614 *name* is the Doctype name, *pubid* is the public identifier, 1615 and *system* is the system identifier. 1616 1617 """ 1618 warnings.warn( 1619 "This method of XMLParser is deprecated. Define doctype() " 1620 "method on the TreeBuilder target.", 1621 DeprecationWarning, 1622 ) 1623 1624 # sentinel, if doctype is redefined in a subclass 1625 __doctype = doctype 1626 1627 def feed(self, data): 1628 """Feed encoded data to parser.""" 1629 try: 1630 self.parser.Parse(data, 0) 1631 except self._error as v: 1632 self._raiseerror(v) 1633 1634 def close(self): 1635 """Finish feeding data to parser and return element structure.""" 1636 try: 1637 self.parser.Parse("", 1) # end of data 1638 except self._error as v: 1639 self._raiseerror(v) 1640 try: 1641 close_handler = self.target.close 1642 except AttributeError: 1643 pass 1644 else: 1645 return close_handler() 1646 finally: 1647 # get rid of circular references 1648 del self.parser, self._parser 1649 del self.target, self._target 1650 1651 1652# Import the C accelerators 1653try: 1654 # Element is going to be shadowed by the C implementation. We need to keep 1655 # the Python version of it accessible for some "creative" by external code 1656 # (see tests) 1657 _Element_Py = Element 1658 1659 # Element, SubElement, ParseError, TreeBuilder, XMLParser 1660 from _elementtree import * 1661except ImportError: 1662 pass 1663