1 /* -*-             c-basic-offset: 4; indent-tabs-mode: nil; -*-  //------100-columns-wide------>|*/
2 // for license please see accompanying LICENSE.txt file (available also at http://www.xmlpull.org/)
3 
4 package org.xmlpull.v1;
5 
6 import java.io.InputStream;
7 import java.io.IOException;
8 import java.io.Reader;
9 
10 /**
11  * XML Pull Parser is an interface that defines parsing functionality provided
12  * in <a href="http://www.xmlpull.org/">XMLPULL V1 API</a> (visit this website to
13  * learn more about API and its implementations).
14  *
15  * <p>There are following different
16  * kinds of parser depending on which features are set:<ul>
17  * <li><b>non-validating</b> parser as defined in XML 1.0 spec when
18  *   FEATURE_PROCESS_DOCDECL is set to true
19  * <li><b>validating parser</b> as defined in XML 1.0 spec when
20  *   FEATURE_VALIDATION is true (and that implies that FEATURE_PROCESS_DOCDECL is true)
21  * <li>when FEATURE_PROCESS_DOCDECL is false (this is default and
22  *   if different value is required necessary must be changed before parsing is started)
23  *   then parser behaves like XML 1.0 compliant non-validating parser under condition that
24  *  <em>no DOCDECL is present</em> in XML documents
25  *   (internal entites can still be defined with defineEntityReplacementText()).
26  *   This mode of operation is intended <b>for operation in constrained environments</b> such as J2ME.
27  * </ul>
28  *
29  *
30  * <p>There are two key methods: next() and nextToken(). While next() provides
31  * access to high level parsing events, nextToken() allows access to lower
32  * level tokens.
33  *
34  * <p>The current event state of the parser
35  * can be determined by calling the
36  * <a href="#getEventType()">getEventType()</a> method.
37  * Initially, the parser is in the <a href="#START_DOCUMENT">START_DOCUMENT</a>
38  * state.
39  *
40  * <p>The method <a href="#next()">next()</a> advances the parser to the
41  * next event. The int value returned from next determines the current parser
42  * state and is identical to the value returned from following calls to
43  * getEventType ().
44  *
45  * <p>Th following event types are seen by next()<dl>
46  * <dt><a href="#START_TAG">START_TAG</a><dd> An XML start tag was read.
47  * <dt><a href="#TEXT">TEXT</a><dd> Text content was read;
48  * the text content can be retrieved using the getText() method.
49  *  (when in validating mode next() will not report ignorable whitespace, use nextToken() instead)
50  * <dt><a href="#END_TAG">END_TAG</a><dd> An end tag was read
51  * <dt><a href="#END_DOCUMENT">END_DOCUMENT</a><dd> No more events are available
52  * </dl>
53  *
54  * <p>after first next() or nextToken() (or any other next*() method)
55  * is called user application can obtain
56  * XML version, standalone and encoding from XML declaration
57  * in following ways:<ul>
58  * <li><b>version</b>:
59  *  getProperty(&quot;<a href="http://xmlpull.org/v1/doc/properties.html#xmldecl-version">http://xmlpull.org/v1/doc/properties.html#xmldecl-version</a>&quot;)
60  *       returns String ("1.0") or null if XMLDecl was not read or if property is not supported
61  * <li><b>standalone</b>:
62  *  getProperty(&quot;<a href="http://xmlpull.org/v1/doc/properties.html#xmldecl-standalone">http://xmlpull.org/v1/doc/properties.html#xmldecl-standalone</a>&quot;)
63  *       returns Boolean: null if there was no standalone declaration
64  *  or if property is not supported
65  *         otherwise returns Boolean(true) if standalone="yes" and Boolean(false) when standalone="no"
66  * <li><b>encoding</b>: obtained from getInputEncoding()
67  *       null if stream had unknown encoding (not set in setInputStream)
68  *           and it was not declared in XMLDecl
69  * </ul>
70  *
71  * A minimal example for using this API may look as follows:
72  * <pre>
73  * import java.io.IOException;
74  * import java.io.StringReader;
75  *
76  * import org.xmlpull.v1.XmlPullParser;
77  * import org.xmlpull.v1.<a href="XmlPullParserException.html">XmlPullParserException</a>;
78  * import org.xmlpull.v1.<a href="XmlPullParserFactory.html">XmlPullParserFactory</a>;
79  *
80  * public class SimpleXmlPullApp
81  * {
82  *
83  *     public static void main (String args[])
84  *         throws XmlPullParserException, IOException
85  *     {
86  *         XmlPullParserFactory factory = XmlPullParserFactory.newInstance();
87  *         factory.setNamespaceAware(true);
88  *         XmlPullParser xpp = factory.newPullParser();
89  *
90  *         xpp.<a href="#setInput">setInput</a>( new StringReader ( "&lt;foo>Hello World!&lt;/foo>" ) );
91  *         int eventType = xpp.getEventType();
92  *         while (eventType != XmlPullParser.END_DOCUMENT) {
93  *          if(eventType == XmlPullParser.START_DOCUMENT) {
94  *              System.out.println("Start document");
95  *          } else if(eventType == XmlPullParser.START_TAG) {
96  *              System.out.println("Start tag "+xpp.<a href="#getName()">getName()</a>);
97  *          } else if(eventType == XmlPullParser.END_TAG) {
98  *              System.out.println("End tag "+xpp.getName());
99  *          } else if(eventType == XmlPullParser.TEXT) {
100  *              System.out.println("Text "+xpp.<a href="#getText()">getText()</a>);
101  *          }
102  *          eventType = xpp.next();
103  *         }
104  *         System.out.println("End document");
105  *     }
106  * }
107  * </pre>
108  *
109  * <p>The above example will generate the following output:
110  * <pre>
111  * Start document
112  * Start tag foo
113  * Text Hello World!
114  * End tag foo
115  * End document
116  * </pre>
117  *
118  * <p>For more details on API usage, please refer to the
119  * quick Introduction available at <a href="http://www.xmlpull.org">http://www.xmlpull.org</a>
120  *
121  * @see XmlPullParserFactory
122  * @see #defineEntityReplacementText
123  * @see #getName
124  * @see #getNamespace
125  * @see #getText
126  * @see #next
127  * @see #nextToken
128  * @see #setInput
129  * @see #FEATURE_PROCESS_DOCDECL
130  * @see #FEATURE_VALIDATION
131  * @see #START_DOCUMENT
132  * @see #START_TAG
133  * @see #TEXT
134  * @see #END_TAG
135  * @see #END_DOCUMENT
136  *
137  * @author <a href="http://www-ai.cs.uni-dortmund.de/PERSONAL/haustein.html">Stefan Haustein</a>
138  * @author <a href="http://www.extreme.indiana.edu/~aslom/">Aleksander Slominski</a>
139  */
140 
141 public interface XmlPullParser {
142 
143     /** This constant represents the default namespace (empty string "") */
144     String NO_NAMESPACE = "";
145 
146     // ----------------------------------------------------------------------------
147     // EVENT TYPES as reported by next()
148 
149     /**
150      * Signalize that parser is at the very beginning of the document
151      * and nothing was read yet.
152      * This event type can only be observed by calling getEvent()
153      * before the first call to next(), nextToken, or nextTag()</a>).
154      *
155      * @see #next
156      * @see #nextToken
157      */
158     int START_DOCUMENT = 0;
159 
160     /**
161      * Logical end of the xml document. Returned from getEventType, next()
162      * and nextToken()
163      * when the end of the input document has been reached.
164      * <p><strong>NOTE:</strong> subsequent calls to
165      * <a href="#next()">next()</a> or <a href="#nextToken()">nextToken()</a>
166      * may result in exception being thrown.
167      *
168      * @see #next
169      * @see #nextToken
170      */
171     int END_DOCUMENT = 1;
172 
173     /**
174      * Returned from getEventType(),
175      * <a href="#next()">next()</a>, <a href="#nextToken()">nextToken()</a> when
176      * a start tag was read.
177      * The name of start tag is available from getName(), its namespace and prefix are
178      * available from getNamespace() and getPrefix()
179      * if <a href='#FEATURE_PROCESS_NAMESPACES'>namespaces are enabled</a>.
180      * See getAttribute* methods to retrieve element attributes.
181      * See getNamespace* methods to retrieve newly declared namespaces.
182      *
183      * @see #next
184      * @see #nextToken
185      * @see #getName
186      * @see #getPrefix
187      * @see #getNamespace
188      * @see #getAttributeCount
189      * @see #getDepth
190      * @see #getNamespaceCount
191      * @see #getNamespace
192      * @see #FEATURE_PROCESS_NAMESPACES
193      */
194     int START_TAG = 2;
195 
196     /**
197      * Returned from getEventType(), <a href="#next()">next()</a>, or
198      * <a href="#nextToken()">nextToken()</a> when an end tag was read.
199      * The name of start tag is available from getName(), its
200      * namespace and prefix are
201      * available from getNamespace() and getPrefix().
202      *
203      * @see #next
204      * @see #nextToken
205      * @see #getName
206      * @see #getPrefix
207      * @see #getNamespace
208      * @see #FEATURE_PROCESS_NAMESPACES
209      */
210     int END_TAG = 3;
211 
212 
213     /**
214      * Character data was read and will is available by calling getText().
215      * <p><strong>Please note:</strong> <a href="#next()">next()</a> will
216      * accumulate multiple
217      * events into one TEXT event, skipping IGNORABLE_WHITESPACE,
218      * PROCESSING_INSTRUCTION and COMMENT events,
219      * In contrast, <a href="#nextToken()">nextToken()</a> will stop reading
220      * text when any other event is observed.
221      * Also, when the state was reached by calling next(), the text value will
222      * be normalized, whereas getText() will
223      * return unnormalized content in the case of nextToken(). This allows
224      * an exact roundtrip without changing line ends when examining low
225      * level events, whereas for high level applications the text is
226      * normalized appropriately.
227      *
228      * @see #next
229      * @see #nextToken
230      * @see #getText
231      */
232     int TEXT = 4;
233 
234     // ----------------------------------------------------------------------------
235     // additional events exposed by lower level nextToken()
236 
237     /**
238      * A CDATA sections was just read;
239      * this token is available only from calls to <a href="#nextToken()">nextToken()</a>.
240      * A call to next() will accumulate various text events into a single event
241      * of type TEXT. The text contained in the CDATA section is available
242      * by calling getText().
243      *
244      * @see #nextToken
245      * @see #getText
246      */
247     int CDSECT = 5;
248 
249     /**
250      * An entity reference was just read;
251      * this token is available from <a href="#nextToken()">nextToken()</a>
252      * only. The entity name is available by calling getName(). If available,
253      * the replacement text can be obtained by calling getText(); otherwise,
254      * the user is responsible for resolving the entity reference.
255      * This event type is never returned from next(); next() will
256      * accumulate the replacement text and other text
257      * events to a single TEXT event.
258      *
259      * @see #nextToken
260      * @see #getText
261      */
262     int ENTITY_REF = 6;
263 
264     /**
265      * Ignorable whitespace was just read.
266      * This token is available only from <a href="#nextToken()">nextToken()</a>).
267      * For non-validating
268      * parsers, this event is only reported by nextToken() when outside
269      * the root element.
270      * Validating parsers may be able to detect ignorable whitespace at
271      * other locations.
272      * The ignorable whitespace string is available by calling getText()
273      *
274      * <p><strong>NOTE:</strong> this is different from calling the
275      *  isWhitespace() method, since text content
276      *  may be whitespace but not ignorable.
277      *
278      * Ignorable whitespace is skipped by next() automatically; this event
279      * type is never returned from next().
280      *
281      * @see #nextToken
282      * @see #getText
283      */
284     int IGNORABLE_WHITESPACE = 7;
285 
286     /**
287      * An XML processing instruction declaration was just read. This
288      * event type is available only via <a href="#nextToken()">nextToken()</a>.
289      * getText() will return text that is inside the processing instruction.
290      * Calls to next() will skip processing instructions automatically.
291      * @see #nextToken
292      * @see #getText
293      */
294     int PROCESSING_INSTRUCTION = 8;
295 
296     /**
297      * An XML comment was just read. This event type is this token is
298      * available via <a href="#nextToken()">nextToken()</a> only;
299      * calls to next() will skip comments automatically.
300      * The content of the comment can be accessed using the getText()
301      * method.
302      *
303      * @see #nextToken
304      * @see #getText
305      */
306     int COMMENT = 9;
307 
308     /**
309      * An XML document type declaration was just read. This token is
310      * available from <a href="#nextToken()">nextToken()</a> only.
311      * The unparsed text inside the doctype is available via
312      * the getText() method.
313      *
314      * @see #nextToken
315      * @see #getText
316      */
317     int DOCDECL = 10;
318 
319     /**
320      * This array can be used to convert the event type integer constants
321      * such as START_TAG or TEXT to
322      * to a string. For example, the value of TYPES[START_TAG] is
323      * the string "START_TAG".
324      *
325      * This array is intended for diagnostic output only. Relying
326      * on the contents of the array may be dangerous since malicious
327      * applications may alter the array, although it is final, due
328      * to limitations of the Java language.
329      */
330     String [] TYPES = {
331         "START_DOCUMENT",
332             "END_DOCUMENT",
333             "START_TAG",
334             "END_TAG",
335             "TEXT",
336             "CDSECT",
337             "ENTITY_REF",
338             "IGNORABLE_WHITESPACE",
339             "PROCESSING_INSTRUCTION",
340             "COMMENT",
341             "DOCDECL"
342     };
343 
344 
345     // ----------------------------------------------------------------------------
346     // namespace related features
347 
348     /**
349      * This feature determines whether the parser processes
350      * namespaces. As for all features, the default value is false.
351      * <p><strong>NOTE:</strong> The value can not be changed during
352      * parsing an must be set before parsing.
353      *
354      * @see #getFeature
355      * @see #setFeature
356      */
357     String FEATURE_PROCESS_NAMESPACES =
358         "http://xmlpull.org/v1/doc/features.html#process-namespaces";
359 
360     /**
361      * This feature determines whether namespace attributes are
362      * exposed via the attribute access methods. Like all features,
363      * the default value is false. This feature cannot be changed
364      * during parsing.
365      *
366      * @see #getFeature
367      * @see #setFeature
368      */
369     String FEATURE_REPORT_NAMESPACE_ATTRIBUTES =
370         "http://xmlpull.org/v1/doc/features.html#report-namespace-prefixes";
371 
372     /**
373      * This feature determines whether the document declaration
374      * is processed. If set to false,
375      * the DOCDECL event type is reported by nextToken()
376      * and ignored by next().
377      *
378      * If this feature is activated, then the document declaration
379      * must be processed by the parser.
380      *
381      * <p><strong>Please note:</strong> If the document type declaration
382      * was ignored, entity references may cause exceptions
383      * later in the parsing process.
384      * The default value of this feature is false. It cannot be changed
385      * during parsing.
386      *
387      * @see #getFeature
388      * @see #setFeature
389      */
390     String FEATURE_PROCESS_DOCDECL =
391         "http://xmlpull.org/v1/doc/features.html#process-docdecl";
392 
393     /**
394      * If this feature is activated, all validation errors as
395      * defined in the XML 1.0 specification are reported.
396      * This implies that FEATURE_PROCESS_DOCDECL is true and both, the
397      * internal and external document type declaration will be processed.
398      * <p><strong>Please Note:</strong> This feature can not be changed
399      * during parsing. The default value is false.
400      *
401      * @see #getFeature
402      * @see #setFeature
403      */
404     String FEATURE_VALIDATION =
405         "http://xmlpull.org/v1/doc/features.html#validation";
406 
407     /**
408      * Use this call to change the general behaviour of the parser,
409      * such as namespace processing or doctype declaration handling.
410      * This method must be called before the first call to next or
411      * nextToken. Otherwise, an exception is thrown.
412      * <p>Example: call setFeature(FEATURE_PROCESS_NAMESPACES, true) in order
413      * to switch on namespace processing. The initial settings correspond
414      * to the properties requested from the XML Pull Parser factory.
415      * If none were requested, all features are deactivated by default.
416      *
417      * @exception XmlPullParserException If the feature is not supported or can not be set
418      * @exception IllegalArgumentException If string with the feature name is null
419      */
setFeature(String name, boolean state)420     void setFeature(String name,
421                            boolean state) throws XmlPullParserException;
422 
423     /**
424      * Returns the current value of the given feature.
425      * <p><strong>Please note:</strong> unknown features are
426      * <strong>always</strong> returned as false.
427      *
428      * @param name The name of feature to be retrieved.
429      * @return The value of the feature.
430      * @exception IllegalArgumentException if string the feature name is null
431      */
432 
getFeature(String name)433     boolean getFeature(String name);
434 
435     /**
436      * Set the value of a property.
437      *
438      * The property name is any fully-qualified URI.
439      *
440      * @exception XmlPullParserException If the property is not supported or can not be set
441      * @exception IllegalArgumentException If string with the property name is null
442      */
setProperty(String name, Object value)443     void setProperty(String name,
444                             Object value) throws XmlPullParserException;
445 
446     /**
447      * Look up the value of a property.
448      *
449      * The property name is any fully-qualified URI.
450      * <p><strong>NOTE:</strong> unknown properties are <strong>always</strong>
451      * returned as null.
452      *
453      * @param name The name of property to be retrieved.
454      * @return The value of named property.
455      */
getProperty(String name)456     Object getProperty(String name);
457 
458 
459     /**
460      * Set the input source for parser to the given reader and
461      * resets the parser. The event type is set to the initial value
462      * START_DOCUMENT.
463      * Setting the reader to null will just stop parsing and
464      * reset parser state,
465      * allowing the parser to free internal resources
466      * such as parsing buffers.
467      */
setInput(Reader in)468     void setInput(Reader in) throws XmlPullParserException;
469 
470 
471     /**
472      * Sets the input stream the parser is going to process.
473      * This call resets the parser state and sets the event type
474      * to the initial value START_DOCUMENT.
475      *
476      * <p><strong>NOTE:</strong> If an input encoding string is passed,
477      *  it MUST be used. Otherwise,
478      *  if inputEncoding is null, the parser SHOULD try to determine
479      *  input encoding following XML 1.0 specification (see below).
480      *  If encoding detection is supported then following feature
481      *  <a href="http://xmlpull.org/v1/doc/features.html#detect-encoding">http://xmlpull.org/v1/doc/features.html#detect-encoding</a>
482      *  MUST be true and otherwise it must be false.
483      *
484      * @param inputStream contains a raw byte input stream of possibly
485      *     unknown encoding (when inputEncoding is null).
486      *
487      * @param inputEncoding if not null it MUST be used as encoding for inputStream
488      */
setInput(InputStream inputStream, String inputEncoding)489     void setInput(InputStream inputStream, String inputEncoding)
490         throws XmlPullParserException;
491 
492     /**
493      * Returns the input encoding if known, null otherwise.
494      * If setInput(InputStream, inputEncoding) was called with an inputEncoding
495      * value other than null, this value must be returned
496      * from this method. Otherwise, if inputEncoding is null and
497      * the parser supports the encoding detection feature
498      * (http://xmlpull.org/v1/doc/features.html#detect-encoding),
499      * it must return the detected encoding.
500      * If setInput(Reader) was called, null is returned.
501      * After first call to next if XML declaration was present this method
502      * will return encoding declared.
503      */
getInputEncoding()504     String getInputEncoding();
505 
506     /**
507      * Set new value for entity replacement text as defined in
508      * <a href="http://www.w3.org/TR/REC-xml#intern-replacement">XML 1.0 Section 4.5
509      * Construction of Internal Entity Replacement Text</a>.
510      * If FEATURE_PROCESS_DOCDECL or FEATURE_VALIDATION are set, calling this
511      * function will result in an exception -- when processing of DOCDECL is
512      * enabled, there is no need to the entity replacement text manually.
513      *
514      * <p>The motivation for this function is to allow very small
515      * implementations of XMLPULL that will work in J2ME environments.
516      * Though these implementations may not be able to process the document type
517      * declaration, they still can work with known DTDs by using this function.
518      *
519      * <p><b>Please notes:</b> The given value is used literally as replacement text
520      * and it corresponds to declaring entity in DTD that has all special characters
521      * escaped: left angle bracket is replaced with &amp;lt;, ampersand with &amp;amp;
522      * and so on.
523      *
524      * <p><b>Note:</b> The given value is the literal replacement text and must not
525      * contain any other entity reference (if it contains any entity reference
526      * there will be no further replacement).
527      *
528      * <p><b>Note:</b> The list of pre-defined entity names will
529      * always contain standard XML entities such as
530      * amp (&amp;amp;), lt (&amp;lt;), gt (&amp;gt;), quot (&amp;quot;), and apos (&amp;apos;).
531      * Those cannot be redefined by this method!
532      *
533      * @see #setInput
534      * @see #FEATURE_PROCESS_DOCDECL
535      * @see #FEATURE_VALIDATION
536      */
defineEntityReplacementText( String entityName, String replacementText )537     void defineEntityReplacementText( String entityName,
538                                             String replacementText ) throws XmlPullParserException;
539 
540     /**
541      * Returns the numbers of elements in the namespace stack for the given
542      * depth.
543      * If namespaces are not enabled, 0 is returned.
544      *
545      * <p><b>NOTE:</b> when parser is on END_TAG then it is allowed to call
546      *  this function with getDepth()+1 argument to retrieve position of namespace
547      *  prefixes and URIs that were declared on corresponding START_TAG.
548      * <p><b>NOTE:</b> to retrieve list of namespaces declared in current element:<pre>
549      *       XmlPullParser pp = ...
550      *       int nsStart = pp.getNamespaceCount(pp.getDepth()-1);
551      *       int nsEnd = pp.getNamespaceCount(pp.getDepth());
552      *       for (int i = nsStart; i < nsEnd; i++) {
553      *          String prefix = pp.getNamespacePrefix(i);
554      *          String ns = pp.getNamespaceUri(i);
555      *           // ...
556      *      }
557      * </pre>
558      *
559      * @see #getNamespacePrefix
560      * @see #getNamespaceUri
561      * @see #getNamespace()
562      * @see #getNamespace(String)
563      */
getNamespaceCount(int depth)564     int getNamespaceCount(int depth) throws XmlPullParserException;
565 
566     /**
567      * Returns the namespace prefix for the given position
568      * in the namespace stack.
569      * Default namespace declaration (xmlns='...') will have null as prefix.
570      * If the given index is out of range, an exception is thrown.
571      * <p><b>Please note:</b> when the parser is on an END_TAG,
572      * namespace prefixes that were declared
573      * in the corresponding START_TAG are still accessible
574      * although they are no longer in scope.
575      */
getNamespacePrefix(int pos)576     String getNamespacePrefix(int pos) throws XmlPullParserException;
577 
578     /**
579      * Returns the namespace URI for the given position in the
580      * namespace stack
581      * If the position is out of range, an exception is thrown.
582      * <p><b>NOTE:</b> when parser is on END_TAG then namespace prefixes that were declared
583      *  in corresponding START_TAG are still accessible even though they are not in scope
584      */
getNamespaceUri(int pos)585     String getNamespaceUri(int pos) throws XmlPullParserException;
586 
587     /**
588      * Returns the URI corresponding to the given prefix,
589      * depending on current state of the parser.
590      *
591      * <p>If the prefix was not declared in the current scope,
592      * null is returned. The default namespace is included
593      * in the namespace table and is available via
594      * getNamespace (null).
595      *
596      * <p>This method is a convenience method for
597      *
598      * <pre>
599      *  for (int i = getNamespaceCount(getDepth ())-1; i >= 0; i--) {
600      *   if (getNamespacePrefix(i).equals( prefix )) {
601      *     return getNamespaceUri(i);
602      *   }
603      *  }
604      *  return null;
605      * </pre>
606      *
607      * <p><strong>Please note:</strong> parser implementations
608      * may provide more efficient lookup, e.g. using a Hashtable.
609      * The 'xml' prefix is bound to "http://www.w3.org/XML/1998/namespace", as
610      * defined in the
611      * <a href="http://www.w3.org/TR/REC-xml-names/#ns-using">Namespaces in XML</a>
612      * specification. Analogous, the 'xmlns' prefix is resolved to
613      * <a href="http://www.w3.org/2000/xmlns/">http://www.w3.org/2000/xmlns/</a>
614      *
615      * @see #getNamespaceCount
616      * @see #getNamespacePrefix
617      * @see #getNamespaceUri
618      */
getNamespace(String prefix)619     String getNamespace (String prefix);
620 
621 
622     // --------------------------------------------------------------------------
623     // miscellaneous reporting methods
624 
625     /**
626      * Returns the current depth of the element.
627      * Outside the root element, the depth is 0. The
628      * depth is incremented by 1 when a start tag is reached.
629      * The depth is decremented AFTER the end tag
630      * event was observed.
631      *
632      * <pre>
633      * &lt;!-- outside --&gt;     0
634      * &lt;root>                  1
635      *   sometext                 1
636      *     &lt;foobar&gt;         2
637      *     &lt;/foobar&gt;        2
638      * &lt;/root&gt;              1
639      * &lt;!-- outside --&gt;     0
640      * </pre>
641      */
getDepth()642     int getDepth();
643 
644     /**
645      * Returns a short text describing the current parser state, including
646      * the position, a
647      * description of the current event and the data source if known.
648      * This method is especially useful to provide meaningful
649      * error messages and for debugging purposes.
650      */
getPositionDescription()651     String getPositionDescription ();
652 
653 
654     /**
655      * Returns the current line number, starting from 1.
656      * When the parser does not know the current line number
657      * or can not determine it,  -1 is returned (e.g. for WBXML).
658      *
659      * @return current line number or -1 if unknown.
660      */
getLineNumber()661     int getLineNumber();
662 
663     /**
664      * Returns the current column number, starting from 1.
665      * When the parser does not know the current column number
666      * or can not determine it,  -1 is returned (e.g. for WBXML).
667      *
668      * @return current column number or -1 if unknown.
669      */
getColumnNumber()670     int getColumnNumber();
671 
672 
673     // --------------------------------------------------------------------------
674     // TEXT related methods
675 
676     /**
677      * Checks whether the current TEXT event contains only whitespace
678      * characters.
679      * For IGNORABLE_WHITESPACE, this is always true.
680      * For TEXT and CDSECT, false is returned when the current event text
681      * contains at least one non-white space character. For any other
682      * event type an exception is thrown.
683      *
684      * <p><b>Please note:</b> non-validating parsers are not
685      * able to distinguish whitespace and ignorable whitespace,
686      * except from whitespace outside the root element. Ignorable
687      * whitespace is reported as separate event, which is exposed
688      * via nextToken only.
689      *
690      */
isWhitespace()691     boolean isWhitespace() throws XmlPullParserException;
692 
693     /**
694      * Returns the text content of the current event as String.
695      * The value returned depends on current event type,
696      * for example for TEXT event it is element content
697      * (this is typical case when next() is used).
698      *
699      * See description of nextToken() for detailed description of
700      * possible returned values for different types of events.
701      *
702      * <p><strong>NOTE:</strong> in case of ENTITY_REF, this method returns
703      * the entity replacement text (or null if not available). This is
704      * the only case where
705      * getText() and getTextCharacters() return different values.
706      *
707      * @see #getEventType
708      * @see #next
709      * @see #nextToken
710      */
getText()711     String getText ();
712 
713 
714     /**
715      * Returns the buffer that contains the text of the current event,
716      * as well as the start offset and length relevant for the current
717      * event. See getText(), next() and nextToken() for description of possible returned values.
718      *
719      * <p><strong>Please note:</strong> this buffer must not
720      * be modified and its content MAY change after a call to
721      * next() or nextToken(). This method will always return the
722      * same value as getText(), except for ENTITY_REF. In the case
723      * of ENTITY ref, getText() returns the replacement text and
724      * this method returns the actual input buffer containing the
725      * entity name.
726      * If getText() returns null, this method returns null as well and
727      * the values returned in the holder array MUST be -1 (both start
728      * and length).
729      *
730      * @see #getText
731      * @see #next
732      * @see #nextToken
733      *
734      * @param holderForStartAndLength Must hold an 2-element int array
735      * into which the start offset and length values will be written.
736      * @return char buffer that contains the text of the current event
737      *  (null if the current event has no text associated).
738      */
getTextCharacters(int [] holderForStartAndLength)739     char[] getTextCharacters(int [] holderForStartAndLength);
740 
741     // --------------------------------------------------------------------------
742     // START_TAG / END_TAG shared methods
743 
744     /**
745      * Returns the namespace URI of the current element.
746      * The default namespace is represented
747      * as empty string.
748      * If namespaces are not enabled, an empty String ("") is always returned.
749      * The current event must be START_TAG or END_TAG; otherwise,
750      * null is returned.
751      */
getNamespace()752     String getNamespace ();
753 
754     /**
755      * For START_TAG or END_TAG events, the (local) name of the current
756      * element is returned when namespaces are enabled. When namespace
757      * processing is disabled, the raw name is returned.
758      * For ENTITY_REF events, the entity name is returned.
759      * If the current event is not START_TAG, END_TAG, or ENTITY_REF,
760      * null is returned.
761      * <p><b>Please note:</b> To reconstruct the raw element name
762      *  when namespaces are enabled and the prefix is not null,
763      * you will need to  add the prefix and a colon to localName..
764      *
765      */
getName()766     String getName();
767 
768     /**
769      * Returns the prefix of the current element.
770      * If the element is in the default namespace (has no prefix),
771      * null is returned.
772      * If namespaces are not enabled, or the current event
773      * is not  START_TAG or END_TAG, null is returned.
774      */
getPrefix()775     String getPrefix();
776 
777     /**
778      * Returns true if the current event is START_TAG and the tag
779      * is degenerated
780      * (e.g. &lt;foobar/&gt;).
781      * <p><b>NOTE:</b> if the parser is not on START_TAG, an exception
782      * will be thrown.
783      */
isEmptyElementTag()784     boolean isEmptyElementTag() throws XmlPullParserException;
785 
786     // --------------------------------------------------------------------------
787     // START_TAG Attributes retrieval methods
788 
789     /**
790      * Returns the number of attributes of the current start tag, or
791      * -1 if the current event type is not START_TAG
792      *
793      * @see #getAttributeNamespace
794      * @see #getAttributeName
795      * @see #getAttributePrefix
796      * @see #getAttributeValue
797      */
getAttributeCount()798     int getAttributeCount();
799 
800     /**
801      * Returns the namespace URI of the attribute
802      * with the given index (starts from 0).
803      * Returns an empty string ("") if namespaces are not enabled
804      * or the attribute has no namespace.
805      * Throws an IndexOutOfBoundsException if the index is out of range
806      * or the current event type is not START_TAG.
807      *
808      * <p><strong>NOTE:</strong> if FEATURE_REPORT_NAMESPACE_ATTRIBUTES is set
809      * then namespace attributes (xmlns:ns='...') must be reported
810      * with namespace
811      * <a href="http://www.w3.org/2000/xmlns/">http://www.w3.org/2000/xmlns/</a>
812      * (visit this URL for description!).
813      * The default namespace attribute (xmlns="...") will be reported with empty namespace.
814      * <p><strong>NOTE:</strong>The xml prefix is bound as defined in
815      * <a href="http://www.w3.org/TR/REC-xml-names/#ns-using">Namespaces in XML</a>
816      * specification to "http://www.w3.org/XML/1998/namespace".
817      *
818      * @param index zero-based index of attribute
819      * @return attribute namespace,
820      *   empty string ("") is returned  if namespaces processing is not enabled or
821      *   namespaces processing is enabled but attribute has no namespace (it has no prefix).
822      */
getAttributeNamespace(int index)823     String getAttributeNamespace (int index);
824 
825     /**
826      * Returns the local name of the specified attribute
827      * if namespaces are enabled or just attribute name if namespaces are disabled.
828      * Throws an IndexOutOfBoundsException if the index is out of range
829      * or current event type is not START_TAG.
830      *
831      * @param index zero-based index of attribute
832      * @return attribute name (null is never returned)
833      */
getAttributeName(int index)834     String getAttributeName (int index);
835 
836     /**
837      * Returns the prefix of the specified attribute
838      * Returns null if the element has no prefix.
839      * If namespaces are disabled it will always return null.
840      * Throws an IndexOutOfBoundsException if the index is out of range
841      * or current event type is not START_TAG.
842      *
843      * @param index zero-based index of attribute
844      * @return attribute prefix or null if namespaces processing is not enabled.
845      */
getAttributePrefix(int index)846     String getAttributePrefix(int index);
847 
848     /**
849      * Returns the type of the specified attribute
850      * If parser is non-validating it MUST return CDATA.
851      *
852      * @param index zero-based index of attribute
853      * @return attribute type (null is never returned)
854      */
getAttributeType(int index)855     String getAttributeType(int index);
856 
857     /**
858      * Returns if the specified attribute was not in input was declared in XML.
859      * If parser is non-validating it MUST always return false.
860      * This information is part of XML infoset:
861      *
862      * @param index zero-based index of attribute
863      * @return false if attribute was in input
864      */
isAttributeDefault(int index)865     boolean isAttributeDefault(int index);
866 
867     /**
868      * Returns the given attributes value.
869      * Throws an IndexOutOfBoundsException if the index is out of range
870      * or current event type is not START_TAG.
871      *
872      * <p><strong>NOTE:</strong> attribute value must be normalized
873      * (including entity replacement text if PROCESS_DOCDECL is false) as described in
874      * <a href="http://www.w3.org/TR/REC-xml#AVNormalize">XML 1.0 section
875      * 3.3.3 Attribute-Value Normalization</a>
876      *
877      * @see #defineEntityReplacementText
878      *
879      * @param index zero-based index of attribute
880      * @return value of attribute (null is never returned)
881      */
getAttributeValue(int index)882     String getAttributeValue(int index);
883 
884     /**
885      * Returns the attributes value identified by namespace URI and namespace localName.
886      * If namespaces are disabled namespace must be null.
887      * If current event type is not START_TAG then IndexOutOfBoundsException will be thrown.
888      *
889      * <p><strong>NOTE:</strong> attribute value must be normalized
890      * (including entity replacement text if PROCESS_DOCDECL is false) as described in
891      * <a href="http://www.w3.org/TR/REC-xml#AVNormalize">XML 1.0 section
892      * 3.3.3 Attribute-Value Normalization</a>
893      *
894      * @see #defineEntityReplacementText
895      *
896      * @param namespace Namespace of the attribute if namespaces are enabled otherwise must be null
897      * @param name If namespaces enabled local name of attribute otherwise just attribute name
898      * @return value of attribute or null if attribute with given name does not exist
899      */
getAttributeValue(String namespace, String name)900     String getAttributeValue(String namespace,
901                                     String name);
902 
903     // --------------------------------------------------------------------------
904     // actual parsing methods
905 
906     /**
907      * Returns the type of the current event (START_TAG, END_TAG, TEXT, etc.)
908      *
909      * @see #next()
910      * @see #nextToken()
911      */
getEventType()912     int getEventType()
913         throws XmlPullParserException;
914 
915     /**
916      * Get next parsing event - element content will be coalesced and only one
917      * TEXT event must be returned for whole element content
918      * (comments and processing instructions will be ignored and entity references
919      * must be expanded or exception must be thrown if entity reference can not be expanded).
920      * If element content is empty (content is "") then no TEXT event will be reported.
921      *
922      * <p><b>NOTE:</b> empty element (such as &lt;tag/>) will be reported
923      *  with  two separate events: START_TAG, END_TAG - it must be so to preserve
924      *   parsing equivalency of empty element to &lt;tag>&lt;/tag>.
925      *  (see isEmptyElementTag ())
926      *
927      * @see #isEmptyElementTag
928      * @see #START_TAG
929      * @see #TEXT
930      * @see #END_TAG
931      * @see #END_DOCUMENT
932      */
933 
next()934     int next()
935         throws XmlPullParserException, IOException;
936 
937 
938     /**
939      * This method works similarly to next() but will expose
940      * additional event types (COMMENT, CDSECT, DOCDECL, ENTITY_REF, PROCESSING_INSTRUCTION, or
941      * IGNORABLE_WHITESPACE) if they are available in input.
942      *
943      * <p>If special feature
944      * <a href="http://xmlpull.org/v1/doc/features.html#xml-roundtrip">FEATURE_XML_ROUNDTRIP</a>
945      * (identified by URI: http://xmlpull.org/v1/doc/features.html#xml-roundtrip)
946      * is enabled it is possible to do XML document round trip ie. reproduce
947      * exectly on output the XML input using getText():
948      * returned content is always unnormalized (exactly as in input).
949      * Otherwise returned content is end-of-line normalized as described
950      * <a href="http://www.w3.org/TR/REC-xml#sec-line-ends">XML 1.0 End-of-Line Handling</a>
951      * and. Also when this feature is enabled exact content of START_TAG, END_TAG,
952      * DOCDECL and PROCESSING_INSTRUCTION is available.
953      *
954      * <p>Here is the list of tokens that can be  returned from nextToken()
955      * and what getText() and getTextCharacters() returns:<dl>
956      * <dt>START_DOCUMENT<dd>null
957      * <dt>END_DOCUMENT<dd>null
958      * <dt>START_TAG<dd>null unless FEATURE_XML_ROUNDTRIP
959      *   enabled and then returns XML tag, ex: &lt;tag attr='val'>
960      * <dt>END_TAG<dd>null unless FEATURE_XML_ROUNDTRIP
961      *  id enabled and then returns XML tag, ex: &lt;/tag>
962      * <dt>TEXT<dd>return element content.
963      *  <br>Note: that element content may be delivered in multiple consecutive TEXT events.
964      * <dt>IGNORABLE_WHITESPACE<dd>return characters that are determined to be ignorable white
965      * space. If the FEATURE_XML_ROUNDTRIP is enabled all whitespace content outside root
966      * element will always reported as IGNORABLE_WHITESPACE otherwise reporting is optional.
967      *  <br>Note: that element content may be delivered in multiple consecutive IGNORABLE_WHITESPACE events.
968      * <dt>CDSECT<dd>
969      * return text <em>inside</em> CDATA
970      *  (ex. 'fo&lt;o' from &lt;!CDATA[fo&lt;o]]>)
971      * <dt>PROCESSING_INSTRUCTION<dd>
972      *  if FEATURE_XML_ROUNDTRIP is true
973      *  return exact PI content ex: 'pi foo' from &lt;?pi foo?>
974      *  otherwise it may be exact PI content or concatenation of PI target,
975      * space and data so for example for
976      *   &lt;?target    data?> string &quot;target data&quot; may
977      *       be returned if FEATURE_XML_ROUNDTRIP is false.
978      * <dt>COMMENT<dd>return comment content ex. 'foo bar' from &lt;!--foo bar-->
979      * <dt>ENTITY_REF<dd>getText() MUST return entity replacement text if PROCESS_DOCDECL is false
980      * otherwise getText() MAY return null,
981      * additionally getTextCharacters() MUST return entity name
982      * (for example 'entity_name' for &amp;entity_name;).
983      * <br><b>NOTE:</b> this is the only place where value returned from getText() and
984      *   getTextCharacters() <b>are different</b>
985      * <br><b>NOTE:</b> it is user responsibility to resolve entity reference
986      *    if PROCESS_DOCDECL is false and there is no entity replacement text set in
987      *    defineEntityReplacementText() method (getText() will be null)
988      * <br><b>NOTE:</b> character entities (ex. &amp;#32;) and standard entities such as
989      *  &amp;amp; &amp;lt; &amp;gt; &amp;quot; &amp;apos; are reported as well
990      *  and are <b>not</b> reported as TEXT tokens but as ENTITY_REF tokens!
991      *  This requirement is added to allow to do roundtrip of XML documents!
992      * <dt>DOCDECL<dd>
993      * if FEATURE_XML_ROUNDTRIP is true or PROCESS_DOCDECL is false
994      * then return what is inside of DOCDECL for example it returns:<pre>
995      * &quot; titlepage SYSTEM "http://www.foo.bar/dtds/typo.dtd"
996      * [&lt;!ENTITY % active.links "INCLUDE">]&quot;</pre>
997      * <p>for input document that contained:<pre>
998      * &lt;!DOCTYPE titlepage SYSTEM "http://www.foo.bar/dtds/typo.dtd"
999      * [&lt;!ENTITY % active.links "INCLUDE">]></pre>
1000      * otherwise if FEATURE_XML_ROUNDTRIP is false and PROCESS_DOCDECL is true
1001      *    then what is returned is undefined (it may be even null)
1002      * </dd>
1003      * </dl>
1004      *
1005      * <p><strong>NOTE:</strong> there is no guarantee that there will only one TEXT or
1006      * IGNORABLE_WHITESPACE event from nextToken() as parser may chose to deliver element content in
1007      * multiple tokens (dividing element content into chunks)
1008      *
1009      * <p><strong>NOTE:</strong> whether returned text of token is end-of-line normalized
1010      *  is depending on FEATURE_XML_ROUNDTRIP.
1011      *
1012      * <p><strong>NOTE:</strong> XMLDecl (&lt;?xml ...?&gt;) is not reported but its content
1013      * is available through optional properties (see class description above).
1014      *
1015      * @see #next
1016      * @see #START_TAG
1017      * @see #TEXT
1018      * @see #END_TAG
1019      * @see #END_DOCUMENT
1020      * @see #COMMENT
1021      * @see #DOCDECL
1022      * @see #PROCESSING_INSTRUCTION
1023      * @see #ENTITY_REF
1024      * @see #IGNORABLE_WHITESPACE
1025      */
nextToken()1026     int nextToken()
1027         throws XmlPullParserException, IOException;
1028 
1029     //-----------------------------------------------------------------------------
1030     // utility methods to mak XML parsing easier ...
1031 
1032     /**
1033      * Test if the current event is of the given type and if the
1034      * namespace and name do match. null will match any namespace
1035      * and any name. If the test is not passed, an exception is
1036      * thrown. The exception text indicates the parser position,
1037      * the expected event and the current event that is not meeting the
1038      * requirement.
1039      *
1040      * <p>Essentially it does this
1041      * <pre>
1042      *  if (type != getEventType()
1043      *  || (namespace != null &amp;&amp;  !namespace.equals( getNamespace () ) )
1044      *  || (name != null &amp;&amp;  !name.equals( getName() ) ) )
1045      *     throw new XmlPullParserException( "expected "+ TYPES[ type ]+getPositionDescription());
1046      * </pre>
1047      */
require(int type, String namespace, String name)1048     void require(int type, String namespace, String name)
1049         throws XmlPullParserException, IOException;
1050 
1051     /**
1052      * If current event is START_TAG then if next element is TEXT then element content is returned
1053      * or if next event is END_TAG then empty string is returned, otherwise exception is thrown.
1054      * After calling this function successfully parser will be positioned on END_TAG.
1055      *
1056      * <p>The motivation for this function is to allow to parse consistently both
1057      * empty elements and elements that has non empty content, for example for input: <ol>
1058      * <li>&lt;tag&gt;foo&lt;/tag&gt;
1059      * <li>&lt;tag&gt;&lt;/tag&gt; (which is equivalent to &lt;tag/&gt;
1060      * both input can be parsed with the same code:
1061      * <pre>
1062      *   p.nextTag()
1063      *   p.requireEvent(p.START_TAG, "", "tag");
1064      *   String content = p.nextText();
1065      *   p.requireEvent(p.END_TAG, "", "tag");
1066      * </pre>
1067      * This function together with nextTag make it very easy to parse XML that has
1068      * no mixed content.
1069      *
1070      *
1071      * <p>Essentially it does this
1072      * <pre>
1073      *  if(getEventType() != START_TAG) {
1074      *     throw new XmlPullParserException(
1075      *       "parser must be on START_TAG to read next text", this, null);
1076      *  }
1077      *  int eventType = next();
1078      *  if(eventType == TEXT) {
1079      *     String result = getText();
1080      *     eventType = next();
1081      *     if(eventType != END_TAG) {
1082      *       throw new XmlPullParserException(
1083      *          "event TEXT it must be immediately followed by END_TAG", this, null);
1084      *      }
1085      *      return result;
1086      *  } else if(eventType == END_TAG) {
1087      *     return "";
1088      *  } else {
1089      *     throw new XmlPullParserException(
1090      *       "parser must be on START_TAG or TEXT to read text", this, null);
1091      *  }
1092      * </pre>
1093      *
1094      * <p><strong>Warning:</strong> Prior to API level 14, the pull parser returned by {@code
1095      * android.util.Xml} did not always advance to the END_TAG event when this method was called.
1096      * Work around by using manually advancing after calls to nextText(): <pre>
1097      *  String text = xpp.nextText();
1098      *  if (xpp.getEventType() != XmlPullParser.END_TAG) {
1099      *      xpp.next();
1100      *  }
1101      * </pre>
1102      */
nextText()1103     String nextText() throws XmlPullParserException, IOException;
1104 
1105     /**
1106      * Call next() and return event if it is START_TAG or END_TAG
1107      * otherwise throw an exception.
1108      * It will skip whitespace TEXT before actual tag if any.
1109      *
1110      * <p>essentially it does this
1111      * <pre>
1112      *   int eventType = next();
1113      *   if(eventType == TEXT &amp;&amp;  isWhitespace()) {   // skip whitespace
1114      *      eventType = next();
1115      *   }
1116      *   if (eventType != START_TAG &amp;&amp;  eventType != END_TAG) {
1117      *      throw new XmlPullParserException("expected start or end tag", this, null);
1118      *   }
1119      *   return eventType;
1120      * </pre>
1121      */
nextTag()1122     int nextTag() throws XmlPullParserException, IOException;
1123 
1124 }
1125