1"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers
2should be based on this code. """
3
4import handler
5
6from _exceptions import SAXNotSupportedException, SAXNotRecognizedException
7
8
9# ===== XMLREADER =====
10
11class XMLReader:
12    """Interface for reading an XML document using callbacks.
13
14    XMLReader is the interface that an XML parser's SAX2 driver must
15    implement. This interface allows an application to set and query
16    features and properties in the parser, to register event handlers
17    for document processing, and to initiate a document parse.
18
19    All SAX interfaces are assumed to be synchronous: the parse
20    methods must not return until parsing is complete, and readers
21    must wait for an event-handler callback to return before reporting
22    the next event."""
23
24    def __init__(self):
25        self._cont_handler = handler.ContentHandler()
26        self._dtd_handler = handler.DTDHandler()
27        self._ent_handler = handler.EntityResolver()
28        self._err_handler = handler.ErrorHandler()
29
30    def parse(self, source):
31        "Parse an XML document from a system identifier or an InputSource."
32        raise NotImplementedError("This method must be implemented!")
33
34    def getContentHandler(self):
35        "Returns the current ContentHandler."
36        return self._cont_handler
37
38    def setContentHandler(self, handler):
39        "Registers a new object to receive document content events."
40        self._cont_handler = handler
41
42    def getDTDHandler(self):
43        "Returns the current DTD handler."
44        return self._dtd_handler
45
46    def setDTDHandler(self, handler):
47        "Register an object to receive basic DTD-related events."
48        self._dtd_handler = handler
49
50    def getEntityResolver(self):
51        "Returns the current EntityResolver."
52        return self._ent_handler
53
54    def setEntityResolver(self, resolver):
55        "Register an object to resolve external entities."
56        self._ent_handler = resolver
57
58    def getErrorHandler(self):
59        "Returns the current ErrorHandler."
60        return self._err_handler
61
62    def setErrorHandler(self, handler):
63        "Register an object to receive error-message events."
64        self._err_handler = handler
65
66    def setLocale(self, locale):
67        """Allow an application to set the locale for errors and warnings.
68
69        SAX parsers are not required to provide localization for errors
70        and warnings; if they cannot support the requested locale,
71        however, they must raise a SAX exception. Applications may
72        request a locale change in the middle of a parse."""
73        raise SAXNotSupportedException("Locale support not implemented")
74
75    def getFeature(self, name):
76        "Looks up and returns the state of a SAX2 feature."
77        raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
78
79    def setFeature(self, name, state):
80        "Sets the state of a SAX2 feature."
81        raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
82
83    def getProperty(self, name):
84        "Looks up and returns the value of a SAX2 property."
85        raise SAXNotRecognizedException("Property '%s' not recognized" % name)
86
87    def setProperty(self, name, value):
88        "Sets the value of a SAX2 property."
89        raise SAXNotRecognizedException("Property '%s' not recognized" % name)
90
91class IncrementalParser(XMLReader):
92    """This interface adds three extra methods to the XMLReader
93    interface that allow XML parsers to support incremental
94    parsing. Support for this interface is optional, since not all
95    underlying XML parsers support this functionality.
96
97    When the parser is instantiated it is ready to begin accepting
98    data from the feed method immediately. After parsing has been
99    finished with a call to close the reset method must be called to
100    make the parser ready to accept new data, either from feed or
101    using the parse method.
102
103    Note that these methods must _not_ be called during parsing, that
104    is, after parse has been called and before it returns.
105
106    By default, the class also implements the parse method of the XMLReader
107    interface using the feed, close and reset methods of the
108    IncrementalParser interface as a convenience to SAX 2.0 driver
109    writers."""
110
111    def __init__(self, bufsize=2**16):
112        self._bufsize = bufsize
113        XMLReader.__init__(self)
114
115    def parse(self, source):
116        import saxutils
117        source = saxutils.prepare_input_source(source)
118
119        self.prepareParser(source)
120        file = source.getByteStream()
121        buffer = file.read(self._bufsize)
122        while buffer != "":
123            self.feed(buffer)
124            buffer = file.read(self._bufsize)
125        self.close()
126
127    def feed(self, data):
128        """This method gives the raw XML data in the data parameter to
129        the parser and makes it parse the data, emitting the
130        corresponding events. It is allowed for XML constructs to be
131        split across several calls to feed.
132
133        feed may raise SAXException."""
134        raise NotImplementedError("This method must be implemented!")
135
136    def prepareParser(self, source):
137        """This method is called by the parse implementation to allow
138        the SAX 2.0 driver to prepare itself for parsing."""
139        raise NotImplementedError("prepareParser must be overridden!")
140
141    def close(self):
142        """This method is called when the entire XML document has been
143        passed to the parser through the feed method, to notify the
144        parser that there are no more data. This allows the parser to
145        do the final checks on the document and empty the internal
146        data buffer.
147
148        The parser will not be ready to parse another document until
149        the reset method has been called.
150
151        close may raise SAXException."""
152        raise NotImplementedError("This method must be implemented!")
153
154    def reset(self):
155        """This method is called after close has been called to reset
156        the parser so that it is ready to parse new documents. The
157        results of calling parse or feed after close without calling
158        reset are undefined."""
159        raise NotImplementedError("This method must be implemented!")
160
161# ===== LOCATOR =====
162
163class Locator:
164    """Interface for associating a SAX event with a document
165    location. A locator object will return valid results only during
166    calls to DocumentHandler methods; at any other time, the
167    results are unpredictable."""
168
169    def getColumnNumber(self):
170        "Return the column number where the current event ends."
171        return -1
172
173    def getLineNumber(self):
174        "Return the line number where the current event ends."
175        return -1
176
177    def getPublicId(self):
178        "Return the public identifier for the current event."
179        return None
180
181    def getSystemId(self):
182        "Return the system identifier for the current event."
183        return None
184
185# ===== INPUTSOURCE =====
186
187class InputSource:
188    """Encapsulation of the information needed by the XMLReader to
189    read entities.
190
191    This class may include information about the public identifier,
192    system identifier, byte stream (possibly with character encoding
193    information) and/or the character stream of an entity.
194
195    Applications will create objects of this class for use in the
196    XMLReader.parse method and for returning from
197    EntityResolver.resolveEntity.
198
199    An InputSource belongs to the application, the XMLReader is not
200    allowed to modify InputSource objects passed to it from the
201    application, although it may make copies and modify those."""
202
203    def __init__(self, system_id = None):
204        self.__system_id = system_id
205        self.__public_id = None
206        self.__encoding  = None
207        self.__bytefile  = None
208        self.__charfile  = None
209
210    def setPublicId(self, public_id):
211        "Sets the public identifier of this InputSource."
212        self.__public_id = public_id
213
214    def getPublicId(self):
215        "Returns the public identifier of this InputSource."
216        return self.__public_id
217
218    def setSystemId(self, system_id):
219        "Sets the system identifier of this InputSource."
220        self.__system_id = system_id
221
222    def getSystemId(self):
223        "Returns the system identifier of this InputSource."
224        return self.__system_id
225
226    def setEncoding(self, encoding):
227        """Sets the character encoding of this InputSource.
228
229        The encoding must be a string acceptable for an XML encoding
230        declaration (see section 4.3.3 of the XML recommendation).
231
232        The encoding attribute of the InputSource is ignored if the
233        InputSource also contains a character stream."""
234        self.__encoding = encoding
235
236    def getEncoding(self):
237        "Get the character encoding of this InputSource."
238        return self.__encoding
239
240    def setByteStream(self, bytefile):
241        """Set the byte stream (a Python file-like object which does
242        not perform byte-to-character conversion) for this input
243        source.
244
245        The SAX parser will ignore this if there is also a character
246        stream specified, but it will use a byte stream in preference
247        to opening a URI connection itself.
248
249        If the application knows the character encoding of the byte
250        stream, it should set it with the setEncoding method."""
251        self.__bytefile = bytefile
252
253    def getByteStream(self):
254        """Get the byte stream for this input source.
255
256        The getEncoding method will return the character encoding for
257        this byte stream, or None if unknown."""
258        return self.__bytefile
259
260    def setCharacterStream(self, charfile):
261        """Set the character stream for this input source. (The stream
262        must be a Python 2.0 Unicode-wrapped file-like that performs
263        conversion to Unicode strings.)
264
265        If there is a character stream specified, the SAX parser will
266        ignore any byte stream and will not attempt to open a URI
267        connection to the system identifier."""
268        self.__charfile = charfile
269
270    def getCharacterStream(self):
271        "Get the character stream for this input source."
272        return self.__charfile
273
274# ===== ATTRIBUTESIMPL =====
275
276class AttributesImpl:
277
278    def __init__(self, attrs):
279        """Non-NS-aware implementation.
280
281        attrs should be of the form {name : value}."""
282        self._attrs = attrs
283
284    def getLength(self):
285        return len(self._attrs)
286
287    def getType(self, name):
288        return "CDATA"
289
290    def getValue(self, name):
291        return self._attrs[name]
292
293    def getValueByQName(self, name):
294        return self._attrs[name]
295
296    def getNameByQName(self, name):
297        if not name in self._attrs:
298            raise KeyError, name
299        return name
300
301    def getQNameByName(self, name):
302        if not name in self._attrs:
303            raise KeyError, name
304        return name
305
306    def getNames(self):
307        return self._attrs.keys()
308
309    def getQNames(self):
310        return self._attrs.keys()
311
312    def __len__(self):
313        return len(self._attrs)
314
315    def __getitem__(self, name):
316        return self._attrs[name]
317
318    def keys(self):
319        return self._attrs.keys()
320
321    def has_key(self, name):
322        return name in self._attrs
323
324    def __contains__(self, name):
325        return name in self._attrs
326
327    def get(self, name, alternative=None):
328        return self._attrs.get(name, alternative)
329
330    def copy(self):
331        return self.__class__(self._attrs)
332
333    def items(self):
334        return self._attrs.items()
335
336    def values(self):
337        return self._attrs.values()
338
339# ===== ATTRIBUTESNSIMPL =====
340
341class AttributesNSImpl(AttributesImpl):
342
343    def __init__(self, attrs, qnames):
344        """NS-aware implementation.
345
346        attrs should be of the form {(ns_uri, lname): value, ...}.
347        qnames of the form {(ns_uri, lname): qname, ...}."""
348        self._attrs = attrs
349        self._qnames = qnames
350
351    def getValueByQName(self, name):
352        for (nsname, qname) in self._qnames.items():
353            if qname == name:
354                return self._attrs[nsname]
355
356        raise KeyError, name
357
358    def getNameByQName(self, name):
359        for (nsname, qname) in self._qnames.items():
360            if qname == name:
361                return nsname
362
363        raise KeyError, name
364
365    def getQNameByName(self, name):
366        return self._qnames[name]
367
368    def getQNames(self):
369        return self._qnames.values()
370
371    def copy(self):
372        return self.__class__(self._attrs, self._qnames)
373
374
375def _test():
376    XMLReader()
377    IncrementalParser()
378    Locator()
379
380if __name__ == "__main__":
381    _test()
382