1 /* Copyright (c) 2002,2003, Stefan Haustein, Oberhausen, Rhld., Germany
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to deal
5  * in the Software without restriction, including without limitation the rights
6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or
7  * sell copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The  above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19  * IN THE SOFTWARE. */
20 
21 // Contributors: Paul Hackenberger (unterminated entity handling in relaxed mode)
22 
23 package org.kxml2.io;
24 
25 import java.io.Closeable;
26 import java.io.IOException;
27 import java.io.InputStream;
28 import java.io.InputStreamReader;
29 import java.io.Reader;
30 import java.util.HashMap;
31 import java.util.Map;
32 import libcore.internal.StringPool;
33 import org.xmlpull.v1.XmlPullParser;
34 import org.xmlpull.v1.XmlPullParserException;
35 
36 /**
37  * An XML pull parser with limited support for parsing internal DTDs.
38  */
39 public class KXmlParser implements XmlPullParser, Closeable {
40 
41     private static final String PROPERTY_XMLDECL_VERSION
42             = "http://xmlpull.org/v1/doc/properties.html#xmldecl-version";
43     private static final String PROPERTY_XMLDECL_STANDALONE
44             = "http://xmlpull.org/v1/doc/properties.html#xmldecl-standalone";
45     private static final String PROPERTY_LOCATION = "http://xmlpull.org/v1/doc/properties.html#location";
46     private static final String FEATURE_RELAXED = "http://xmlpull.org/v1/doc/features.html#relaxed";
47 
48     private static final Map<String, String> DEFAULT_ENTITIES = new HashMap<String, String>();
49     static {
50         DEFAULT_ENTITIES.put("lt", "<");
51         DEFAULT_ENTITIES.put("gt", ">");
52         DEFAULT_ENTITIES.put("amp", "&");
53         DEFAULT_ENTITIES.put("apos", "'");
54         DEFAULT_ENTITIES.put("quot", "\"");
55     }
56 
57     private static final int ELEMENTDECL = 11;
58     private static final int ENTITYDECL = 12;
59     private static final int ATTLISTDECL = 13;
60     private static final int NOTATIONDECL = 14;
61     private static final int PARAMETER_ENTITY_REF = 15;
62     private static final char[] START_COMMENT = { '<', '!', '-', '-' };
63     private static final char[] END_COMMENT = { '-', '-', '>' };
64     private static final char[] COMMENT_DOUBLE_DASH = { '-', '-' };
65     private static final char[] START_CDATA = { '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[' };
66     private static final char[] END_CDATA = { ']', ']', '>' };
67     private static final char[] START_PROCESSING_INSTRUCTION = { '<', '?' };
68     private static final char[] END_PROCESSING_INSTRUCTION = { '?', '>' };
69     private static final char[] START_DOCTYPE = { '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E' };
70     private static final char[] SYSTEM = { 'S', 'Y', 'S', 'T', 'E', 'M' };
71     private static final char[] PUBLIC = { 'P', 'U', 'B', 'L', 'I', 'C' };
72     private static final char[] START_ELEMENT = { '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T' };
73     private static final char[] START_ATTLIST = { '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T' };
74     private static final char[] START_ENTITY = { '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y' };
75     private static final char[] START_NOTATION = { '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N' };
76     private static final char[] EMPTY = new char[] { 'E', 'M', 'P', 'T', 'Y' };
77     private static final char[] ANY = new char[]{ 'A', 'N', 'Y' };
78     private static final char[] NDATA = new char[]{ 'N', 'D', 'A', 'T', 'A' };
79     private static final char[] NOTATION = new char[]{ 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N' };
80     private static final char[] REQUIRED = new char[] { 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D' };
81     private static final char[] IMPLIED = new char[] { 'I', 'M', 'P', 'L', 'I', 'E', 'D' };
82     private static final char[] FIXED = new char[] { 'F', 'I', 'X', 'E', 'D' };
83 
84     static final private String UNEXPECTED_EOF = "Unexpected EOF";
85     static final private String ILLEGAL_TYPE = "Wrong event type";
86     static final private int XML_DECLARATION = 998;
87 
88     // general
89     private String location;
90 
91     private String version;
92     private Boolean standalone;
93     private String rootElementName;
94     private String systemId;
95     private String publicId;
96 
97     /**
98      * True if the {@code <!DOCTYPE>} contents are handled. The DTD defines
99      * entity values and default attribute values. These values are parsed at
100      * inclusion time and may contain both tags and entity references.
101      *
102      * <p>If this is false, the user must {@link #defineEntityReplacementText
103      * define entity values manually}. Such entity values are literal strings
104      * and will not be parsed. There is no API to define default attributes
105      * manually.
106      */
107     private boolean processDocDecl;
108     private boolean processNsp;
109     private boolean relaxed;
110     private boolean keepNamespaceAttributes;
111 
112     /**
113      * If non-null, the contents of the read buffer must be copied into this
114      * string builder before the read buffer is overwritten. This is used to
115      * capture the raw DTD text while parsing the DTD.
116      */
117     private StringBuilder bufferCapture;
118 
119     /**
120      * Entities defined in or for this document. This map is created lazily.
121      */
122     private Map<String, char[]> documentEntities;
123 
124     /**
125      * Default attributes in this document. The outer map's key is the element
126      * name; the inner map's key is the attribute name. Both keys should be
127      * without namespace adjustments. This map is created lazily.
128      */
129     private Map<String, Map<String, String>> defaultAttributes;
130 
131 
132     private int depth;
133     private String[] elementStack = new String[16];
134     private String[] nspStack = new String[8];
135     private int[] nspCounts = new int[4];
136 
137     // source
138 
139     private Reader reader;
140     private String encoding;
141     private ContentSource nextContentSource;
142     private char[] buffer = new char[8192];
143     private int position = 0;
144     private int limit = 0;
145 
146     /*
147      * Track the number of newlines and columns preceding the current buffer. To
148      * compute the line and column of a position in the buffer, compute the line
149      * and column in the buffer and add the preceding values.
150      */
151     private int bufferStartLine;
152     private int bufferStartColumn;
153 
154     // the current token
155 
156     private int type;
157     private boolean isWhitespace;
158     private String namespace;
159     private String prefix;
160     private String name;
161     private String text;
162 
163     private boolean degenerated;
164     private int attributeCount;
165 
166     // true iff. we've encountered the START_TAG of an XML element at depth == 0;
167     private boolean parsedTopLevelStartTag;
168 
169     /*
170      * The current element's attributes arranged in groups of 4:
171      * i + 0 = attribute namespace URI
172      * i + 1 = attribute namespace prefix
173      * i + 2 = attribute qualified name (may contain ":", as in "html:h1")
174      * i + 3 = attribute value
175      */
176     private String[] attributes = new String[16];
177 
178     private String error;
179 
180     private boolean unresolved;
181 
182     public final StringPool stringPool = new StringPool();
183 
184     /**
185      * Retains namespace attributes like {@code xmlns="http://foo"} or {@code xmlns:foo="http:foo"}
186      * in pulled elements. Most applications will only be interested in the effective namespaces of
187      * their elements, so these attributes aren't useful. But for structure preserving wrappers like
188      * DOM, it is necessary to keep the namespace data around.
189      */
keepNamespaceAttributes()190     public void keepNamespaceAttributes() {
191         this.keepNamespaceAttributes = true;
192     }
193 
adjustNsp()194     private boolean adjustNsp() throws XmlPullParserException {
195         boolean any = false;
196 
197         for (int i = 0; i < attributeCount << 2; i += 4) {
198             String attrName = attributes[i + 2];
199             int cut = attrName.indexOf(':');
200             String prefix;
201 
202             if (cut != -1) {
203                 prefix = attrName.substring(0, cut);
204                 attrName = attrName.substring(cut + 1);
205             } else if (attrName.equals("xmlns")) {
206                 prefix = attrName;
207                 attrName = null;
208             } else {
209                 continue;
210             }
211 
212             if (!prefix.equals("xmlns")) {
213                 any = true;
214             } else {
215                 int j = (nspCounts[depth]++) << 1;
216 
217                 nspStack = ensureCapacity(nspStack, j + 2);
218                 nspStack[j] = attrName;
219                 nspStack[j + 1] = attributes[i + 3];
220 
221                 if (attrName != null && attributes[i + 3].isEmpty()) {
222                     checkRelaxed("illegal empty namespace");
223                 }
224 
225                 if (keepNamespaceAttributes) {
226                     // explicitly set the namespace for unprefixed attributes
227                     // such as xmlns="http://foo"
228                     attributes[i] = "http://www.w3.org/2000/xmlns/";
229                     any = true;
230                 } else {
231                     System.arraycopy(
232                             attributes,
233                             i + 4,
234                             attributes,
235                             i,
236                             ((--attributeCount) << 2) - i);
237 
238                     i -= 4;
239                 }
240             }
241         }
242 
243         if (any) {
244             for (int i = (attributeCount << 2) - 4; i >= 0; i -= 4) {
245 
246                 String attrName = attributes[i + 2];
247                 int cut = attrName.indexOf(':');
248 
249                 if (cut == 0 && !relaxed) {
250                     throw new RuntimeException(
251                             "illegal attribute name: " + attrName + " at " + this);
252                 } else if (cut != -1) {
253                     String attrPrefix = attrName.substring(0, cut);
254 
255                     attrName = attrName.substring(cut + 1);
256 
257                     String attrNs = getNamespace(attrPrefix);
258 
259                     if (attrNs == null && !relaxed) {
260                         throw new RuntimeException(
261                                 "Undefined Prefix: " + attrPrefix + " in " + this);
262                     }
263 
264                     attributes[i] = attrNs;
265                     attributes[i + 1] = attrPrefix;
266                     attributes[i + 2] = attrName;
267                 }
268             }
269         }
270 
271         int cut = name.indexOf(':');
272 
273         if (cut == 0) {
274             checkRelaxed("illegal tag name: " + name);
275         }
276 
277         if (cut != -1) {
278             prefix = name.substring(0, cut);
279             name = name.substring(cut + 1);
280         }
281 
282         this.namespace = getNamespace(prefix);
283 
284         if (this.namespace == null) {
285             if (prefix != null) {
286                 checkRelaxed("undefined prefix: " + prefix);
287             }
288             this.namespace = NO_NAMESPACE;
289         }
290 
291         return any;
292     }
293 
ensureCapacity(String[] arr, int required)294     private String[] ensureCapacity(String[] arr, int required) {
295         if (arr.length >= required) {
296             return arr;
297         }
298         String[] bigger = new String[required + 16];
299         System.arraycopy(arr, 0, bigger, 0, arr.length);
300         return bigger;
301     }
302 
checkRelaxed(String errorMessage)303     private void checkRelaxed(String errorMessage) throws XmlPullParserException {
304         if (!relaxed) {
305             throw new XmlPullParserException(errorMessage, this, null);
306         }
307         if (error == null) {
308             error = "Error: " + errorMessage;
309         }
310     }
311 
next()312     public int next() throws XmlPullParserException, IOException {
313         return next(false);
314     }
315 
nextToken()316     public int nextToken() throws XmlPullParserException, IOException {
317         return next(true);
318     }
319 
next(boolean justOneToken)320     private int next(boolean justOneToken) throws IOException, XmlPullParserException {
321         if (reader == null) {
322             throw new XmlPullParserException("setInput() must be called first.", this, null);
323         }
324 
325         if (type == END_TAG) {
326             depth--;
327         }
328 
329         // degenerated needs to be handled before error because of possible
330         // processor expectations(!)
331 
332         if (degenerated) {
333             degenerated = false;
334             type = END_TAG;
335             return type;
336         }
337 
338         if (error != null) {
339             if (justOneToken) {
340                 text = error;
341                 type = COMMENT;
342                 error = null;
343                 return type;
344             } else {
345                 error = null;
346             }
347         }
348 
349         type = peekType(false);
350 
351         if (type == XML_DECLARATION) {
352             readXmlDeclaration();
353             type = peekType(false);
354         }
355 
356         text = null;
357         isWhitespace = true;
358         prefix = null;
359         name = null;
360         namespace = null;
361         attributeCount = -1;
362         boolean throwOnResolveFailure = !justOneToken;
363 
364         while (true) {
365             switch (type) {
366 
367             /*
368              * Return immediately after encountering a start tag, end tag, or
369              * the end of the document.
370              */
371             case START_TAG:
372                 parseStartTag(false, throwOnResolveFailure);
373                 return type;
374             case END_TAG:
375                 readEndTag();
376                 return type;
377             case END_DOCUMENT:
378                 return type;
379 
380             /*
381              * Return after any text token when we're looking for a single
382              * token. Otherwise concatenate all text between tags.
383              */
384             case ENTITY_REF:
385                 if (justOneToken) {
386                     StringBuilder entityTextBuilder = new StringBuilder();
387                     readEntity(entityTextBuilder, true, throwOnResolveFailure, ValueContext.TEXT);
388                     text = entityTextBuilder.toString();
389                     break;
390                 }
391                 // fall-through
392             case TEXT:
393                 text = readValue('<', !justOneToken, throwOnResolveFailure, ValueContext.TEXT);
394                 if (depth == 0 && isWhitespace) {
395                     type = IGNORABLE_WHITESPACE;
396                 }
397                 break;
398             case CDSECT:
399                 read(START_CDATA);
400                 text = readUntil(END_CDATA, true);
401                 break;
402 
403             /*
404              * Comments, processing instructions and declarations are returned
405              * when we're looking for a single token. Otherwise they're skipped.
406              */
407             case COMMENT:
408                 String commentText = readComment(justOneToken);
409                 if (justOneToken) {
410                     text = commentText;
411                 }
412                 break;
413             case PROCESSING_INSTRUCTION:
414                 read(START_PROCESSING_INSTRUCTION);
415                 String processingInstruction = readUntil(END_PROCESSING_INSTRUCTION, justOneToken);
416                 if (justOneToken) {
417                     text = processingInstruction;
418                 }
419                 break;
420             case DOCDECL:
421                 readDoctype(justOneToken);
422                 if (parsedTopLevelStartTag) {
423                     throw new XmlPullParserException("Unexpected token", this, null);
424                 }
425                 break;
426 
427             default:
428                 throw new XmlPullParserException("Unexpected token", this, null);
429             }
430 
431             if (depth == 0 && (type == ENTITY_REF || type == TEXT || type == CDSECT)) {
432                 throw new XmlPullParserException("Unexpected token", this, null);
433             }
434 
435             if (justOneToken) {
436                 return type;
437             }
438 
439             if (type == IGNORABLE_WHITESPACE) {
440                 text = null;
441             }
442 
443             /*
444              * We've read all that we can of a non-empty text block. Always
445              * report this as text, even if it was a CDATA block or entity
446              * reference.
447              */
448             int peek = peekType(false);
449             if (text != null && !text.isEmpty() && peek < TEXT) {
450                 type = TEXT;
451                 return type;
452             }
453 
454             type = peek;
455         }
456     }
457 
458     /**
459      * Reads text until the specified delimiter is encountered. Consumes the
460      * text and the delimiter.
461      *
462      * @param returnText true to return the read text excluding the delimiter;
463      *     false to return null.
464      */
readUntil(char[] delimiter, boolean returnText)465     private String readUntil(char[] delimiter, boolean returnText)
466             throws IOException, XmlPullParserException {
467         int start = position;
468         StringBuilder result = null;
469 
470         if (returnText && text != null) {
471             result = new StringBuilder();
472             result.append(text);
473         }
474 
475         search:
476         while (true) {
477             if (position + delimiter.length > limit) {
478                 if (start < position && returnText) {
479                     if (result == null) {
480                         result = new StringBuilder();
481                     }
482                     result.append(buffer, start, position - start);
483                 }
484                 if (!fillBuffer(delimiter.length)) {
485                     checkRelaxed(UNEXPECTED_EOF);
486                     type = COMMENT;
487                     return null;
488                 }
489                 start = position;
490             }
491 
492             // TODO: replace with Arrays.equals(buffer, position, delimiter, 0, delimiter.length)
493             // when the VM has better method inlining
494             for (int i = 0; i < delimiter.length; i++) {
495                 if (buffer[position + i] != delimiter[i]) {
496                     position++;
497                     continue search;
498                 }
499             }
500 
501             break;
502         }
503 
504         int end = position;
505         position += delimiter.length;
506 
507         if (!returnText) {
508             return null;
509         } else if (result == null) {
510             return stringPool.get(buffer, start, end - start);
511         } else {
512             result.append(buffer, start, end - start);
513             return result.toString();
514         }
515     }
516 
517     /**
518      * Returns true if an XML declaration was read.
519      */
readXmlDeclaration()520     private void readXmlDeclaration() throws IOException, XmlPullParserException {
521         if (bufferStartLine != 0 || bufferStartColumn != 0 || position != 0) {
522             checkRelaxed("processing instructions must not start with xml");
523         }
524 
525         read(START_PROCESSING_INSTRUCTION);
526         parseStartTag(true, true);
527 
528         if (attributeCount < 1 || !"version".equals(attributes[2])) {
529             checkRelaxed("version expected");
530         }
531 
532         version = attributes[3];
533 
534         int pos = 1;
535 
536         if (pos < attributeCount && "encoding".equals(attributes[2 + 4])) {
537             encoding = attributes[3 + 4];
538             pos++;
539         }
540 
541         if (pos < attributeCount && "standalone".equals(attributes[4 * pos + 2])) {
542             String st = attributes[3 + 4 * pos];
543             if ("yes".equals(st)) {
544                 standalone = Boolean.TRUE;
545             } else if ("no".equals(st)) {
546                 standalone = Boolean.FALSE;
547             } else {
548                 checkRelaxed("illegal standalone value: " + st);
549             }
550             pos++;
551         }
552 
553         if (pos != attributeCount) {
554             checkRelaxed("unexpected attributes in XML declaration");
555         }
556 
557         isWhitespace = true;
558         text = null;
559     }
560 
readComment(boolean returnText)561     private String readComment(boolean returnText) throws IOException, XmlPullParserException {
562         read(START_COMMENT);
563 
564         if (relaxed) {
565             return readUntil(END_COMMENT, returnText);
566         }
567 
568         String commentText = readUntil(COMMENT_DOUBLE_DASH, returnText);
569         if (peekCharacter() != '>') {
570             throw new XmlPullParserException("Comments may not contain --", this, null);
571         }
572         position++;
573         return commentText;
574     }
575 
576     /**
577      * Read the document's DTD. Although this parser is non-validating, the DTD
578      * must be parsed to capture entity values and default attribute values.
579      */
readDoctype(boolean saveDtdText)580     private void readDoctype(boolean saveDtdText) throws IOException, XmlPullParserException {
581         read(START_DOCTYPE);
582 
583         int startPosition = -1;
584         if (saveDtdText) {
585             bufferCapture = new StringBuilder();
586             startPosition = position;
587         }
588         try {
589             skip();
590             rootElementName = readName();
591             readExternalId(true, true);
592             skip();
593             if (peekCharacter() == '[') {
594                 readInternalSubset();
595             }
596             skip();
597         } finally {
598             if (saveDtdText) {
599                 bufferCapture.append(buffer, 0, position);
600                 bufferCapture.delete(0, startPosition);
601                 text = bufferCapture.toString();
602                 bufferCapture = null;
603             }
604         }
605 
606         read('>');
607         skip();
608     }
609 
610     /**
611      * Reads an external ID of one of these two forms:
612      *   SYSTEM "quoted system name"
613      *   PUBLIC "quoted public id" "quoted system name"
614      *
615      * If the system name is not required, this also supports lone public IDs of
616      * this form:
617      *   PUBLIC "quoted public id"
618      *
619      * Returns true if any ID was read.
620      */
readExternalId(boolean requireSystemName, boolean assignFields)621     private boolean readExternalId(boolean requireSystemName, boolean assignFields)
622             throws IOException, XmlPullParserException {
623         skip();
624         int c = peekCharacter();
625 
626         if (c == 'S') {
627             read(SYSTEM);
628         } else if (c == 'P') {
629             read(PUBLIC);
630             skip();
631             if (assignFields) {
632                 publicId = readQuotedId(true);
633             } else {
634                 readQuotedId(false);
635             }
636         } else {
637             return false;
638         }
639 
640         skip();
641 
642         if (!requireSystemName) {
643             int delimiter = peekCharacter();
644             if (delimiter != '"' && delimiter != '\'') {
645                 return true; // no system name!
646             }
647         }
648 
649         if (assignFields) {
650             systemId = readQuotedId(true);
651         } else {
652             readQuotedId(false);
653         }
654         return true;
655     }
656 
657     private static final char[] SINGLE_QUOTE = new char[] { '\'' };
658     private static final char[] DOUBLE_QUOTE = new char[] { '"' };
659 
660     /**
661      * Reads a quoted string, performing no entity escaping of the contents.
662      */
readQuotedId(boolean returnText)663     private String readQuotedId(boolean returnText) throws IOException, XmlPullParserException {
664         int quote = peekCharacter();
665         char[] delimiter;
666         if (quote == '"') {
667             delimiter = DOUBLE_QUOTE;
668         } else if (quote == '\'') {
669             delimiter = SINGLE_QUOTE;
670         } else {
671             throw new XmlPullParserException("Expected a quoted string", this, null);
672         }
673         position++;
674         return readUntil(delimiter, returnText);
675     }
676 
readInternalSubset()677     private void readInternalSubset() throws IOException, XmlPullParserException {
678         read('[');
679 
680         while (true) {
681             skip();
682             if (peekCharacter() == ']') {
683                 position++;
684                 return;
685             }
686 
687             int declarationType = peekType(true);
688             switch (declarationType) {
689             case ELEMENTDECL:
690                 readElementDeclaration();
691                 break;
692 
693             case ATTLISTDECL:
694                 readAttributeListDeclaration();
695                 break;
696 
697             case ENTITYDECL:
698                 readEntityDeclaration();
699                 break;
700 
701             case NOTATIONDECL:
702                 readNotationDeclaration();
703                 break;
704 
705             case PROCESSING_INSTRUCTION:
706                 read(START_PROCESSING_INSTRUCTION);
707                 readUntil(END_PROCESSING_INSTRUCTION, false);
708                 break;
709 
710             case COMMENT:
711                 readComment(false);
712                 break;
713 
714             case PARAMETER_ENTITY_REF:
715                 throw new XmlPullParserException(
716                         "Parameter entity references are not supported", this, null);
717 
718             default:
719                 throw new XmlPullParserException("Unexpected token", this, null);
720             }
721         }
722     }
723 
724     /**
725      * Read an element declaration. This contains a name and a content spec.
726      *   <!ELEMENT foo EMPTY >
727      *   <!ELEMENT foo (bar?,(baz|quux)) >
728      *   <!ELEMENT foo (#PCDATA|bar)* >
729      */
readElementDeclaration()730     private void readElementDeclaration() throws IOException, XmlPullParserException {
731         read(START_ELEMENT);
732         skip();
733         readName();
734         readContentSpec();
735         skip();
736         read('>');
737     }
738 
739     /**
740      * Read an element content spec. This is a regular expression-like pattern
741      * of names or other content specs. The following operators are supported:
742      *   sequence:    (a,b,c)
743      *   choice:      (a|b|c)
744      *   optional:    a?
745      *   one or more: a+
746      *   any number:  a*
747      *
748      * The special name '#PCDATA' is permitted but only if it is the first
749      * element of the first group:
750      *   (#PCDATA|a|b)
751      *
752      * The top-level element must be either a choice, a sequence, or one of the
753      * special names EMPTY and ANY.
754      */
readContentSpec()755     private void readContentSpec() throws IOException, XmlPullParserException {
756         // this implementation is very lenient; it scans for balanced parens only
757         skip();
758         int c = peekCharacter();
759         if (c == '(') {
760             int depth = 0;
761             do {
762                 if (c == '(') {
763                     depth++;
764                 } else if (c == ')') {
765                     depth--;
766                 } else if (c == -1) {
767                     throw new XmlPullParserException(
768                             "Unterminated element content spec", this, null);
769                 }
770                 position++;
771                 c = peekCharacter();
772             } while (depth > 0);
773 
774             if (c == '*' || c == '?' || c == '+') {
775                 position++;
776             }
777         } else if (c == EMPTY[0]) {
778             read(EMPTY);
779         } else if (c == ANY[0]) {
780             read(ANY);
781         } else {
782             throw new XmlPullParserException("Expected element content spec", this, null);
783         }
784     }
785 
786     /**
787      * Reads an attribute list declaration such as the following:
788      *   <!ATTLIST foo
789      *       bar CDATA #IMPLIED
790      *       quux (a|b|c) "c"
791      *       baz NOTATION (a|b|c) #FIXED "c">
792      *
793      * Each attribute has a name, type and default.
794      *
795      * Types are one of the built-in types (CDATA, ID, IDREF, IDREFS, ENTITY,
796      * ENTITIES, NMTOKEN, or NMTOKENS), an enumerated type "(list|of|options)"
797      * or NOTATION followed by an enumerated type.
798      *
799      * The default is either #REQUIRED, #IMPLIED, #FIXED, a quoted value, or
800      * #FIXED with a quoted value.
801      */
readAttributeListDeclaration()802     private void readAttributeListDeclaration() throws IOException, XmlPullParserException {
803         read(START_ATTLIST);
804         skip();
805         String elementName = readName();
806 
807         while (true) {
808             skip();
809             int c = peekCharacter();
810             if (c == '>') {
811                 position++;
812                 return;
813             }
814 
815             // attribute name
816             String attributeName = readName();
817 
818             // attribute type
819             skip();
820             if (position + 1 >= limit && !fillBuffer(2)) {
821                 throw new XmlPullParserException("Malformed attribute list", this, null);
822             }
823             if (buffer[position] == NOTATION[0] && buffer[position + 1] == NOTATION[1]) {
824                 read(NOTATION);
825                 skip();
826             }
827             c = peekCharacter();
828             if (c == '(') {
829                 position++;
830                 while (true) {
831                     skip();
832                     readName();
833                     skip();
834                     c = peekCharacter();
835                     if (c == ')') {
836                         position++;
837                         break;
838                     } else if (c == '|') {
839                         position++;
840                     } else {
841                         throw new XmlPullParserException("Malformed attribute type", this, null);
842                     }
843                 }
844             } else {
845                 readName();
846             }
847 
848             // default value
849             skip();
850             c = peekCharacter();
851             if (c == '#') {
852                 position++;
853                 c = peekCharacter();
854                 if (c == 'R') {
855                     read(REQUIRED);
856                 } else if (c == 'I') {
857                     read(IMPLIED);
858                 } else if (c == 'F') {
859                     read(FIXED);
860                 } else {
861                     throw new XmlPullParserException("Malformed attribute type", this, null);
862                 }
863                 skip();
864                 c = peekCharacter();
865             }
866             if (c == '"' || c == '\'') {
867                 position++;
868                 // TODO: does this do escaping correctly?
869                 String value = readValue((char) c, true, true, ValueContext.ATTRIBUTE);
870                 if (peekCharacter() == c) {
871                     position++;
872                 }
873                 defineAttributeDefault(elementName, attributeName, value);
874             }
875         }
876     }
877 
defineAttributeDefault(String elementName, String attributeName, String value)878     private void defineAttributeDefault(String elementName, String attributeName, String value) {
879         if (defaultAttributes == null) {
880             defaultAttributes = new HashMap<String, Map<String, String>>();
881         }
882         Map<String, String> elementAttributes = defaultAttributes.get(elementName);
883         if (elementAttributes == null) {
884             elementAttributes = new HashMap<String, String>();
885             defaultAttributes.put(elementName, elementAttributes);
886         }
887         elementAttributes.put(attributeName, value);
888     }
889 
890     /**
891      * Read an entity declaration. The value of internal entities are inline:
892      *   <!ENTITY foo "bar">
893      *
894      * The values of external entities must be retrieved by URL or path:
895      *   <!ENTITY foo SYSTEM "http://host/file">
896      *   <!ENTITY foo PUBLIC "-//Android//Foo//EN" "http://host/file">
897      *   <!ENTITY foo SYSTEM "../file.png" NDATA png>
898      *
899      * Entities may be general or parameterized. Parameterized entities are
900      * marked by a percent sign. Such entities may only be used in the DTD:
901      *   <!ENTITY % foo "bar">
902      */
readEntityDeclaration()903     private void readEntityDeclaration() throws IOException, XmlPullParserException {
904         read(START_ENTITY);
905         boolean generalEntity = true;
906 
907         skip();
908         if (peekCharacter() == '%') {
909             generalEntity = false;
910             position++;
911             skip();
912         }
913 
914         String name = readName();
915 
916         skip();
917         int quote = peekCharacter();
918         String entityValue;
919         if (quote == '"' || quote == '\'') {
920             position++;
921             entityValue = readValue((char) quote, true, false, ValueContext.ENTITY_DECLARATION);
922             if (peekCharacter() == quote) {
923                 position++;
924             }
925         } else if (readExternalId(true, false)) {
926             /*
927              * Map external entities to the empty string. This is dishonest,
928              * but it's consistent with Android's Expat pull parser.
929              */
930             entityValue = "";
931             skip();
932             if (peekCharacter() == NDATA[0]) {
933                 read(NDATA);
934                 skip();
935                 readName();
936             }
937         } else {
938             throw new XmlPullParserException("Expected entity value or external ID", this, null);
939         }
940 
941         if (generalEntity && processDocDecl) {
942             if (documentEntities == null) {
943                 documentEntities = new HashMap<String, char[]>();
944             }
945             documentEntities.put(name, entityValue.toCharArray());
946         }
947 
948         skip();
949         read('>');
950     }
951 
readNotationDeclaration()952     private void readNotationDeclaration() throws IOException, XmlPullParserException {
953         read(START_NOTATION);
954         skip();
955         readName();
956         if (!readExternalId(false, false)) {
957             throw new XmlPullParserException(
958                     "Expected external ID or public ID for notation", this, null);
959         }
960         skip();
961         read('>');
962     }
963 
readEndTag()964     private void readEndTag() throws IOException, XmlPullParserException {
965         read('<');
966         read('/');
967         name = readName(); // TODO: pass the expected name in as a hint?
968         skip();
969         read('>');
970 
971         int sp = (depth - 1) * 4;
972 
973         if (depth == 0) {
974             checkRelaxed("read end tag " + name + " with no tags open");
975             type = COMMENT;
976             return;
977         }
978 
979         if (name.equals(elementStack[sp + 3])) {
980             namespace = elementStack[sp];
981             prefix = elementStack[sp + 1];
982             name = elementStack[sp + 2];
983         } else if (!relaxed) {
984             throw new XmlPullParserException(
985                     "expected: /" + elementStack[sp + 3] + " read: " + name, this, null);
986         }
987     }
988 
989     /**
990      * Returns the type of the next token.
991      */
peekType(boolean inDeclaration)992     private int peekType(boolean inDeclaration) throws IOException, XmlPullParserException {
993         if (position >= limit && !fillBuffer(1)) {
994             return END_DOCUMENT;
995         }
996 
997         switch (buffer[position]) {
998         case '&':
999             return ENTITY_REF; // &
1000         case '<':
1001             if (position + 3 >= limit && !fillBuffer(4)) {
1002                 throw new XmlPullParserException("Dangling <", this, null);
1003             }
1004 
1005             switch (buffer[position + 1]) {
1006             case '/':
1007                 return END_TAG; // </
1008             case '?':
1009                 // we're looking for "<?xml " with case insensitivity
1010                 if ((position + 5 < limit || fillBuffer(6))
1011                         && (buffer[position + 2] == 'x' || buffer[position + 2] == 'X')
1012                         && (buffer[position + 3] == 'm' || buffer[position + 3] == 'M')
1013                         && (buffer[position + 4] == 'l' || buffer[position + 4] == 'L')
1014                         && (buffer[position + 5] == ' ')) {
1015                     return XML_DECLARATION; // <?xml
1016                 } else {
1017                     return PROCESSING_INSTRUCTION; // <?
1018                 }
1019             case '!':
1020                 switch (buffer[position + 2]) {
1021                 case 'D':
1022                     return DOCDECL; // <!D
1023                 case '[':
1024                     return CDSECT; // <![
1025                 case '-':
1026                     return COMMENT; // <!-
1027                 case 'E':
1028                     switch (buffer[position + 3]) {
1029                     case 'L':
1030                         return ELEMENTDECL; // <!EL
1031                     case 'N':
1032                         return ENTITYDECL; // <!EN
1033                     }
1034                     break;
1035                 case 'A':
1036                     return ATTLISTDECL;  // <!A
1037                 case 'N':
1038                     return NOTATIONDECL; // <!N
1039                 }
1040                 throw new XmlPullParserException("Unexpected <!", this, null);
1041             default:
1042                 return START_TAG; // <
1043             }
1044         case '%':
1045             return inDeclaration ? PARAMETER_ENTITY_REF : TEXT;
1046         default:
1047             return TEXT;
1048         }
1049     }
1050 
1051     /**
1052      * Sets name and attributes
1053      */
parseStartTag(boolean xmldecl, boolean throwOnResolveFailure)1054     private void parseStartTag(boolean xmldecl, boolean throwOnResolveFailure)
1055             throws IOException, XmlPullParserException {
1056         if (!xmldecl) {
1057             read('<');
1058         }
1059         name = readName();
1060         attributeCount = 0;
1061 
1062         while (true) {
1063             skip();
1064 
1065             if (position >= limit && !fillBuffer(1)) {
1066                 checkRelaxed(UNEXPECTED_EOF);
1067                 return;
1068             }
1069 
1070             int c = buffer[position];
1071 
1072             if (xmldecl) {
1073                 if (c == '?') {
1074                     position++;
1075                     read('>');
1076                     return;
1077                 }
1078             } else {
1079                 if (c == '/') {
1080                     degenerated = true;
1081                     position++;
1082                     skip();
1083                     read('>');
1084                     break;
1085                 } else if (c == '>') {
1086                     position++;
1087                     break;
1088                 }
1089             }
1090 
1091             String attrName = readName();
1092 
1093             int i = (attributeCount++) * 4;
1094             attributes = ensureCapacity(attributes, i + 4);
1095             attributes[i] = "";
1096             attributes[i + 1] = null;
1097             attributes[i + 2] = attrName;
1098 
1099             skip();
1100             if (position >= limit && !fillBuffer(1)) {
1101                 checkRelaxed(UNEXPECTED_EOF);
1102                 return;
1103             }
1104 
1105             if (buffer[position] == '=') {
1106                 position++;
1107 
1108                 skip();
1109                 if (position >= limit && !fillBuffer(1)) {
1110                     checkRelaxed(UNEXPECTED_EOF);
1111                     return;
1112                 }
1113                 char delimiter = buffer[position];
1114 
1115                 if (delimiter == '\'' || delimiter == '"') {
1116                     position++;
1117                 } else if (relaxed) {
1118                     delimiter = ' ';
1119                 } else {
1120                     throw new XmlPullParserException("attr value delimiter missing!", this, null);
1121                 }
1122 
1123                 attributes[i + 3] = readValue(delimiter, true, throwOnResolveFailure,
1124                         ValueContext.ATTRIBUTE);
1125 
1126                 if (delimiter != ' ' && peekCharacter() == delimiter) {
1127                     position++; // end quote
1128                 }
1129             } else if (relaxed) {
1130                 attributes[i + 3] = attrName;
1131             } else {
1132                 checkRelaxed("Attr.value missing f. " + attrName);
1133                 attributes[i + 3] = attrName;
1134             }
1135         }
1136 
1137         int sp = depth++ * 4;
1138         if (depth == 1) {
1139             parsedTopLevelStartTag = true;
1140         }
1141         elementStack = ensureCapacity(elementStack, sp + 4);
1142         elementStack[sp + 3] = name;
1143 
1144         if (depth >= nspCounts.length) {
1145             int[] bigger = new int[depth + 4];
1146             System.arraycopy(nspCounts, 0, bigger, 0, nspCounts.length);
1147             nspCounts = bigger;
1148         }
1149 
1150         nspCounts[depth] = nspCounts[depth - 1];
1151 
1152         if (processNsp) {
1153             adjustNsp();
1154         } else {
1155             namespace = "";
1156         }
1157 
1158         // For consistency with Expat, add default attributes after fixing namespaces.
1159         if (defaultAttributes != null) {
1160             Map<String, String> elementDefaultAttributes = defaultAttributes.get(name);
1161             if (elementDefaultAttributes != null) {
1162                 for (Map.Entry<String, String> entry : elementDefaultAttributes.entrySet()) {
1163                     if (getAttributeValue(null, entry.getKey()) != null) {
1164                         continue; // an explicit value overrides the default
1165                     }
1166 
1167                     int i = (attributeCount++) * 4;
1168                     attributes = ensureCapacity(attributes, i + 4);
1169                     attributes[i] = "";
1170                     attributes[i + 1] = null;
1171                     attributes[i + 2] = entry.getKey();
1172                     attributes[i + 3] = entry.getValue();
1173                 }
1174             }
1175         }
1176 
1177         elementStack[sp] = namespace;
1178         elementStack[sp + 1] = prefix;
1179         elementStack[sp + 2] = name;
1180     }
1181 
1182     /**
1183      * Reads an entity reference from the buffer, resolves it, and writes the
1184      * resolved entity to {@code out}. If the entity cannot be read or resolved,
1185      * {@code out} will contain the partial entity reference.
1186      */
readEntity(StringBuilder out, boolean isEntityToken, boolean throwOnResolveFailure, ValueContext valueContext)1187     private void readEntity(StringBuilder out, boolean isEntityToken, boolean throwOnResolveFailure,
1188             ValueContext valueContext) throws IOException, XmlPullParserException {
1189         int start = out.length();
1190 
1191         if (buffer[position++] != '&') {
1192             throw new AssertionError();
1193         }
1194 
1195         out.append('&');
1196 
1197         while (true) {
1198             int c = peekCharacter();
1199 
1200             if (c == ';') {
1201                 out.append(';');
1202                 position++;
1203                 break;
1204 
1205             } else if (c >= 128
1206                     || (c >= '0' && c <= '9')
1207                     || (c >= 'a' && c <= 'z')
1208                     || (c >= 'A' && c <= 'Z')
1209                     || c == '_'
1210                     || c == '-'
1211                     || c == '#') {
1212                 position++;
1213                 out.append((char) c);
1214 
1215             } else if (relaxed) {
1216                 // intentionally leave the partial reference in 'out'
1217                 return;
1218 
1219             } else {
1220                 throw new XmlPullParserException("unterminated entity ref", this, null);
1221             }
1222         }
1223 
1224         String code = out.substring(start + 1, out.length() - 1);
1225 
1226         if (isEntityToken) {
1227             name = code;
1228         }
1229 
1230         if (code.startsWith("#")) {
1231             try {
1232                 int c = code.startsWith("#x")
1233                         ? Integer.parseInt(code.substring(2), 16)
1234                         : Integer.parseInt(code.substring(1));
1235                 out.delete(start, out.length());
1236                 out.appendCodePoint(c);
1237                 unresolved = false;
1238                 return;
1239             } catch (NumberFormatException notANumber) {
1240                 throw new XmlPullParserException("Invalid character reference: &" + code);
1241             } catch (IllegalArgumentException invalidCodePoint) {
1242                 throw new XmlPullParserException("Invalid character reference: &" + code);
1243             }
1244         }
1245 
1246         if (valueContext == ValueContext.ENTITY_DECLARATION) {
1247             // keep the unresolved &code; in the text to resolve later
1248             return;
1249         }
1250 
1251         String defaultEntity = DEFAULT_ENTITIES.get(code);
1252         if (defaultEntity != null) {
1253             out.delete(start, out.length());
1254             unresolved = false;
1255             out.append(defaultEntity);
1256             return;
1257         }
1258 
1259         char[] resolved;
1260         if (documentEntities != null && (resolved = documentEntities.get(code)) != null) {
1261             out.delete(start, out.length());
1262             unresolved = false;
1263             if (processDocDecl) {
1264                 pushContentSource(resolved); // parse the entity as XML
1265             } else {
1266                 out.append(resolved); // include the entity value as text
1267             }
1268             return;
1269         }
1270 
1271         /*
1272          * The parser skipped an external DTD, and now we've encountered an
1273          * unknown entity that could have been declared there. Map it to the
1274          * empty string. This is dishonest, but it's consistent with Android's
1275          * old ExpatPullParser.
1276          */
1277         if (systemId != null) {
1278             out.delete(start, out.length());
1279             return;
1280         }
1281 
1282         // keep the unresolved entity "&code;" in the text for relaxed clients
1283         unresolved = true;
1284         if (throwOnResolveFailure) {
1285             checkRelaxed("unresolved: &" + code + ";");
1286         }
1287     }
1288 
1289     /**
1290      * Where a value is found impacts how that value is interpreted. For
1291      * example, in attributes, "\n" must be replaced with a space character. In
1292      * text, "]]>" is forbidden. In entity declarations, named references are
1293      * not resolved.
1294      */
1295     enum ValueContext {
1296         ATTRIBUTE,
1297         TEXT,
1298         ENTITY_DECLARATION
1299     }
1300 
1301     /**
1302      * Returns the current text or attribute value. This also has the side
1303      * effect of setting isWhitespace to false if a non-whitespace character is
1304      * encountered.
1305      *
1306      * @param delimiter {@code <} for text, {@code "} and {@code '} for quoted
1307      *     attributes, or a space for unquoted attributes.
1308      */
readValue(char delimiter, boolean resolveEntities, boolean throwOnResolveFailure, ValueContext valueContext)1309     private String readValue(char delimiter, boolean resolveEntities, boolean throwOnResolveFailure,
1310             ValueContext valueContext) throws IOException, XmlPullParserException {
1311 
1312         /*
1313          * This method returns all of the characters from the current position
1314          * through to an appropriate delimiter.
1315          *
1316          * If we're lucky (which we usually are), we'll return a single slice of
1317          * the buffer. This fast path avoids allocating a string builder.
1318          *
1319          * There are 6 unlucky characters we could encounter:
1320          *  - "&":  entities must be resolved.
1321          *  - "%":  parameter entities are unsupported in entity values.
1322          *  - "<":  this isn't permitted in attributes unless relaxed.
1323          *  - "]":  this requires a lookahead to defend against the forbidden
1324          *          CDATA section delimiter "]]>".
1325          *  - "\r": If a "\r" is followed by a "\n", we discard the "\r". If it
1326          *          isn't followed by "\n", we replace "\r" with either a "\n"
1327          *          in text nodes or a space in attribute values.
1328          *  - "\n": In attribute values, "\n" must be replaced with a space.
1329          *
1330          * We could also get unlucky by needing to refill the buffer midway
1331          * through the text.
1332          */
1333 
1334         int start = position;
1335         StringBuilder result = null;
1336 
1337         // if a text section was already started, prefix the start
1338         if (valueContext == ValueContext.TEXT && text != null) {
1339             result = new StringBuilder();
1340             result.append(text);
1341         }
1342 
1343         while (true) {
1344 
1345             /*
1346              * Make sure we have at least a single character to read from the
1347              * buffer. This mutates the buffer, so save the partial result
1348              * to the slow path string builder first.
1349              */
1350             if (position >= limit) {
1351                 if (start < position) {
1352                     if (result == null) {
1353                         result = new StringBuilder();
1354                     }
1355                     result.append(buffer, start, position - start);
1356                 }
1357                 if (!fillBuffer(1)) {
1358                     return result != null ? result.toString() : "";
1359                 }
1360                 start = position;
1361             }
1362 
1363             char c = buffer[position];
1364 
1365             if (c == delimiter
1366                     || (delimiter == ' ' && (c <= ' ' || c == '>'))
1367                     || c == '&' && !resolveEntities) {
1368                 break;
1369             }
1370 
1371             if (c != '\r'
1372                     && (c != '\n' || valueContext != ValueContext.ATTRIBUTE)
1373                     && c != '&'
1374                     && c != '<'
1375                     && (c != ']' || valueContext != ValueContext.TEXT)
1376                     && (c != '%' || valueContext != ValueContext.ENTITY_DECLARATION)) {
1377                 isWhitespace &= (c <= ' ');
1378                 position++;
1379                 continue;
1380             }
1381 
1382             /*
1383              * We've encountered an unlucky character! Convert from fast
1384              * path to slow path if we haven't done so already.
1385              */
1386             if (result == null) {
1387                 result = new StringBuilder();
1388             }
1389             result.append(buffer, start, position - start);
1390 
1391             if (c == '\r') {
1392                 if ((position + 1 < limit || fillBuffer(2)) && buffer[position + 1] == '\n') {
1393                     position++;
1394                 }
1395                 c = (valueContext == ValueContext.ATTRIBUTE) ? ' ' : '\n';
1396 
1397             } else if (c == '\n') {
1398                 c = ' ';
1399 
1400             } else if (c == '&') {
1401                 isWhitespace = false; // TODO: what if the entity resolves to whitespace?
1402                 readEntity(result, false, throwOnResolveFailure, valueContext);
1403                 start = position;
1404                 continue;
1405 
1406             } else if (c == '<') {
1407                 if (valueContext == ValueContext.ATTRIBUTE) {
1408                     checkRelaxed("Illegal: \"<\" inside attribute value");
1409                 }
1410                 isWhitespace = false;
1411 
1412             } else if (c == ']') {
1413                 if ((position + 2 < limit || fillBuffer(3))
1414                         && buffer[position + 1] == ']' && buffer[position + 2] == '>') {
1415                     checkRelaxed("Illegal: \"]]>\" outside CDATA section");
1416                 }
1417                 isWhitespace = false;
1418 
1419             } else if (c == '%') {
1420                 throw new XmlPullParserException("This parser doesn't support parameter entities",
1421                         this, null);
1422 
1423             } else {
1424                 throw new AssertionError();
1425             }
1426 
1427             position++;
1428             result.append(c);
1429             start = position;
1430         }
1431 
1432         if (result == null) {
1433             return stringPool.get(buffer, start, position - start);
1434         } else {
1435             result.append(buffer, start, position - start);
1436             return result.toString();
1437         }
1438     }
1439 
read(char expected)1440     private void read(char expected) throws IOException, XmlPullParserException {
1441         int c = peekCharacter();
1442         if (c != expected) {
1443             checkRelaxed("expected: '" + expected + "' actual: '" + ((char) c) + "'");
1444             if (c == -1) {
1445                 return; // On EOF, don't move position beyond limit
1446             }
1447         }
1448         position++;
1449     }
1450 
read(char[] chars)1451     private void read(char[] chars) throws IOException, XmlPullParserException {
1452         if (position + chars.length > limit && !fillBuffer(chars.length)) {
1453             checkRelaxed("expected: '" + new String(chars) + "' but was EOF");
1454             return;
1455         }
1456 
1457         // TODO: replace with Arrays.equals(buffer, position, delimiter, 0, delimiter.length)
1458         // when the VM has better method inlining
1459         for (int i = 0; i < chars.length; i++) {
1460             if (buffer[position + i] != chars[i]) {
1461                 checkRelaxed("expected: \"" + new String(chars) + "\" but was \""
1462                         + new String(buffer, position, chars.length) + "...\"");
1463             }
1464         }
1465 
1466         position += chars.length;
1467     }
1468 
peekCharacter()1469     private int peekCharacter() throws IOException, XmlPullParserException {
1470         if (position < limit || fillBuffer(1)) {
1471             return buffer[position];
1472         }
1473         return -1;
1474     }
1475 
1476     /**
1477      * Returns true once {@code limit - position >= minimum}. If the data is
1478      * exhausted before that many characters are available, this returns
1479      * false.
1480      */
fillBuffer(int minimum)1481     private boolean fillBuffer(int minimum) throws IOException, XmlPullParserException {
1482         // If we've exhausted the current content source, remove it
1483         while (nextContentSource != null) {
1484             if (position < limit) {
1485                 throw new XmlPullParserException("Unbalanced entity!", this, null);
1486             }
1487             popContentSource();
1488             if (limit - position >= minimum) {
1489                 return true;
1490             }
1491         }
1492 
1493         // Before clobbering the old characters, update where buffer starts
1494         for (int i = 0; i < position; i++) {
1495             if (buffer[i] == '\n') {
1496                 bufferStartLine++;
1497                 bufferStartColumn = 0;
1498             } else {
1499                 bufferStartColumn++;
1500             }
1501         }
1502 
1503         if (bufferCapture != null) {
1504             bufferCapture.append(buffer, 0, position);
1505         }
1506 
1507         if (limit != position) {
1508             limit -= position;
1509             System.arraycopy(buffer, position, buffer, 0, limit);
1510         } else {
1511             limit = 0;
1512         }
1513 
1514         position = 0;
1515         int total;
1516         while ((total = reader.read(buffer, limit, buffer.length - limit)) != -1) {
1517             limit += total;
1518             if (limit >= minimum) {
1519                 return true;
1520             }
1521         }
1522         return false;
1523     }
1524 
1525     /**
1526      * Returns an element or attribute name. This is always non-empty for
1527      * non-relaxed parsers.
1528      */
readName()1529     private String readName() throws IOException, XmlPullParserException {
1530         if (position >= limit && !fillBuffer(1)) {
1531             checkRelaxed("name expected");
1532             return "";
1533         }
1534 
1535         int start = position;
1536         StringBuilder result = null;
1537 
1538         // read the first character
1539         char c = buffer[position];
1540         if ((c >= 'a' && c <= 'z')
1541                 || (c >= 'A' && c <= 'Z')
1542                 || c == '_'
1543                 || c == ':'
1544                 || c >= '\u00c0' // TODO: check the XML spec
1545                 || relaxed) {
1546             position++;
1547         } else {
1548             checkRelaxed("name expected");
1549             return "";
1550         }
1551 
1552         while (true) {
1553             /*
1554              * Make sure we have at least a single character to read from the
1555              * buffer. This mutates the buffer, so save the partial result
1556              * to the slow path string builder first.
1557              */
1558             if (position >= limit) {
1559                 if (result == null) {
1560                     result = new StringBuilder();
1561                 }
1562                 result.append(buffer, start, position - start);
1563                 if (!fillBuffer(1)) {
1564                     return result.toString();
1565                 }
1566                 start = position;
1567             }
1568 
1569             // read another character
1570             c = buffer[position];
1571             if ((c >= 'a' && c <= 'z')
1572                     || (c >= 'A' && c <= 'Z')
1573                     || (c >= '0' && c <= '9')
1574                     || c == '_'
1575                     || c == '-'
1576                     || c == ':'
1577                     || c == '.'
1578                     || c >= '\u00b7') {  // TODO: check the XML spec
1579                 position++;
1580                 continue;
1581             }
1582 
1583             // we encountered a non-name character. done!
1584             if (result == null) {
1585                 return stringPool.get(buffer, start, position - start);
1586             } else {
1587                 result.append(buffer, start, position - start);
1588                 return result.toString();
1589             }
1590         }
1591     }
1592 
skip()1593     private void skip() throws IOException, XmlPullParserException {
1594         while (position < limit || fillBuffer(1)) {
1595             int c = buffer[position];
1596             if (c > ' ') {
1597                 break;
1598             }
1599             position++;
1600         }
1601     }
1602 
1603     //  public part starts here...
1604 
setInput(Reader reader)1605     public void setInput(Reader reader) throws XmlPullParserException {
1606         this.reader = reader;
1607 
1608         type = START_DOCUMENT;
1609         parsedTopLevelStartTag = false;
1610         name = null;
1611         namespace = null;
1612         degenerated = false;
1613         attributeCount = -1;
1614         encoding = null;
1615         version = null;
1616         standalone = null;
1617 
1618         if (reader == null) {
1619             return;
1620         }
1621 
1622         position = 0;
1623         limit = 0;
1624         bufferStartLine = 0;
1625         bufferStartColumn = 0;
1626         depth = 0;
1627         documentEntities = null;
1628     }
1629 
setInput(InputStream is, String charset)1630     public void setInput(InputStream is, String charset) throws XmlPullParserException {
1631         position = 0;
1632         limit = 0;
1633         boolean detectCharset = (charset == null);
1634 
1635         if (is == null) {
1636             throw new IllegalArgumentException("is == null");
1637         }
1638 
1639         try {
1640             if (detectCharset) {
1641                 // read the four bytes looking for an indication of the encoding in use
1642                 int firstFourBytes = 0;
1643                 while (limit < 4) {
1644                     int i = is.read();
1645                     if (i == -1) {
1646                         break;
1647                     }
1648                     firstFourBytes = (firstFourBytes << 8) | i;
1649                     buffer[limit++] = (char) i;
1650                 }
1651 
1652                 if (limit == 4) {
1653                     switch (firstFourBytes) {
1654                     case 0x00000FEFF: // UTF-32BE BOM
1655                         charset = "UTF-32BE";
1656                         limit = 0;
1657                         break;
1658 
1659                     case 0x0FFFE0000: // UTF-32LE BOM
1660                         charset = "UTF-32LE";
1661                         limit = 0;
1662                         break;
1663 
1664                     case 0x0000003c: // '<' in UTF-32BE
1665                         charset = "UTF-32BE";
1666                         buffer[0] = '<';
1667                         limit = 1;
1668                         break;
1669 
1670                     case 0x03c000000: // '<' in UTF-32LE
1671                         charset = "UTF-32LE";
1672                         buffer[0] = '<';
1673                         limit = 1;
1674                         break;
1675 
1676                     case 0x0003c003f: // "<?" in UTF-16BE
1677                         charset = "UTF-16BE";
1678                         buffer[0] = '<';
1679                         buffer[1] = '?';
1680                         limit = 2;
1681                         break;
1682 
1683                     case 0x03c003f00: // "<?" in UTF-16LE
1684                         charset = "UTF-16LE";
1685                         buffer[0] = '<';
1686                         buffer[1] = '?';
1687                         limit = 2;
1688                         break;
1689 
1690                     case 0x03c3f786d: // "<?xm" in ASCII etc.
1691                         while (true) {
1692                             int i = is.read();
1693                             if (i == -1) {
1694                                 break;
1695                             }
1696                             buffer[limit++] = (char) i;
1697                             if (i == '>') {
1698                                 String s = new String(buffer, 0, limit);
1699                                 int i0 = s.indexOf("encoding");
1700                                 if (i0 != -1) {
1701                                     while (s.charAt(i0) != '"' && s.charAt(i0) != '\'') {
1702                                         i0++;
1703                                     }
1704                                     char deli = s.charAt(i0++);
1705                                     int i1 = s.indexOf(deli, i0);
1706                                     charset = s.substring(i0, i1);
1707                                 }
1708                                 break;
1709                             }
1710                         }
1711                         break;
1712 
1713                     default:
1714                         // handle a byte order mark followed by something other than <?
1715                         if ((firstFourBytes & 0x0ffff0000) == 0x0feff0000) {
1716                             charset = "UTF-16BE";
1717                             buffer[0] = (char) ((buffer[2] << 8) | buffer[3]);
1718                             limit = 1;
1719                         } else if ((firstFourBytes & 0x0ffff0000) == 0x0fffe0000) {
1720                             charset = "UTF-16LE";
1721                             buffer[0] = (char) ((buffer[3] << 8) | buffer[2]);
1722                             limit = 1;
1723                         } else if ((firstFourBytes & 0x0ffffff00) == 0x0efbbbf00) {
1724                             charset = "UTF-8";
1725                             buffer[0] = buffer[3];
1726                             limit = 1;
1727                         }
1728                     }
1729                 }
1730             }
1731 
1732             if (charset == null) {
1733                 charset = "UTF-8";
1734             }
1735 
1736             int savedLimit = limit;
1737             setInput(new InputStreamReader(is, charset));
1738             encoding = charset;
1739             limit = savedLimit;
1740 
1741             /*
1742              * Skip the optional BOM if we didn't above. This decrements limit
1743              * rather than incrementing position so that <?xml version='1.0'?>
1744              * is still at character 0.
1745              */
1746             if (!detectCharset && peekCharacter() == 0xfeff) {
1747                 limit--;
1748                 System.arraycopy(buffer, 1, buffer, 0, limit);
1749             }
1750         } catch (Exception e) {
1751             throw new XmlPullParserException("Invalid stream or encoding: " + e, this, e);
1752         }
1753     }
1754 
close()1755     public void close() throws IOException {
1756         if (reader != null) {
1757             reader.close();
1758         }
1759     }
1760 
getFeature(String feature)1761     public boolean getFeature(String feature) {
1762         if (XmlPullParser.FEATURE_PROCESS_NAMESPACES.equals(feature)) {
1763             return processNsp;
1764         } else if (FEATURE_RELAXED.equals(feature)) {
1765             return relaxed;
1766         } else if (FEATURE_PROCESS_DOCDECL.equals(feature)) {
1767             return processDocDecl;
1768         } else {
1769             return false;
1770         }
1771     }
1772 
getInputEncoding()1773     public String getInputEncoding() {
1774         return encoding;
1775     }
1776 
defineEntityReplacementText(String entity, String value)1777     public void defineEntityReplacementText(String entity, String value)
1778             throws XmlPullParserException {
1779         if (processDocDecl) {
1780             throw new IllegalStateException(
1781                     "Entity replacement text may not be defined with DOCTYPE processing enabled.");
1782         }
1783         if (reader == null) {
1784             throw new IllegalStateException(
1785                     "Entity replacement text must be defined after setInput()");
1786         }
1787         if (documentEntities == null) {
1788             documentEntities = new HashMap<String, char[]>();
1789         }
1790         documentEntities.put(entity, value.toCharArray());
1791     }
1792 
getProperty(String property)1793     public Object getProperty(String property) {
1794         if (property.equals(PROPERTY_XMLDECL_VERSION)) {
1795             return version;
1796         } else if (property.equals(PROPERTY_XMLDECL_STANDALONE)) {
1797             return standalone;
1798         } else if (property.equals(PROPERTY_LOCATION)) {
1799             return location != null ? location : reader.toString();
1800         } else {
1801             return null;
1802         }
1803     }
1804 
1805     /**
1806      * Returns the root element's name if it was declared in the DTD. This
1807      * equals the first tag's name for valid documents.
1808      */
getRootElementName()1809     public String getRootElementName() {
1810         return rootElementName;
1811     }
1812 
1813     /**
1814      * Returns the document's system ID if it was declared. This is typically a
1815      * string like {@code http://www.w3.org/TR/html4/strict.dtd}.
1816      */
getSystemId()1817     public String getSystemId() {
1818         return systemId;
1819     }
1820 
1821     /**
1822      * Returns the document's public ID if it was declared. This is typically a
1823      * string like {@code -//W3C//DTD HTML 4.01//EN}.
1824      */
getPublicId()1825     public String getPublicId() {
1826         return publicId;
1827     }
1828 
getNamespaceCount(int depth)1829     public int getNamespaceCount(int depth) {
1830         if (depth > this.depth) {
1831             throw new IndexOutOfBoundsException();
1832         }
1833         return nspCounts[depth];
1834     }
1835 
getNamespacePrefix(int pos)1836     public String getNamespacePrefix(int pos) {
1837         return nspStack[pos * 2];
1838     }
1839 
getNamespaceUri(int pos)1840     public String getNamespaceUri(int pos) {
1841         return nspStack[(pos * 2) + 1];
1842     }
1843 
getNamespace(String prefix)1844     public String getNamespace(String prefix) {
1845         if ("xml".equals(prefix)) {
1846             return "http://www.w3.org/XML/1998/namespace";
1847         }
1848         if ("xmlns".equals(prefix)) {
1849             return "http://www.w3.org/2000/xmlns/";
1850         }
1851 
1852         for (int i = (getNamespaceCount(depth) << 1) - 2; i >= 0; i -= 2) {
1853             if (prefix == null) {
1854                 if (nspStack[i] == null) {
1855                     return nspStack[i + 1];
1856                 }
1857             } else if (prefix.equals(nspStack[i])) {
1858                 return nspStack[i + 1];
1859             }
1860         }
1861         return null;
1862     }
1863 
getDepth()1864     public int getDepth() {
1865         return depth;
1866     }
1867 
getPositionDescription()1868     public String getPositionDescription() {
1869         StringBuilder buf = new StringBuilder(type < TYPES.length ? TYPES[type] : "unknown");
1870         buf.append(' ');
1871 
1872         if (type == START_TAG || type == END_TAG) {
1873             if (degenerated) {
1874                 buf.append("(empty) ");
1875             }
1876             buf.append('<');
1877             if (type == END_TAG) {
1878                 buf.append('/');
1879             }
1880 
1881             if (prefix != null) {
1882                 buf.append("{" + namespace + "}" + prefix + ":");
1883             }
1884             buf.append(name);
1885 
1886             int cnt = attributeCount * 4;
1887             for (int i = 0; i < cnt; i += 4) {
1888                 buf.append(' ');
1889                 if (attributes[i + 1] != null) {
1890                     buf.append("{" + attributes[i] + "}" + attributes[i + 1] + ":");
1891                 }
1892                 buf.append(attributes[i + 2] + "='" + attributes[i + 3] + "'");
1893             }
1894 
1895             buf.append('>');
1896         } else if (type == IGNORABLE_WHITESPACE) {
1897             ;
1898         } else if (type != TEXT) {
1899             buf.append(getText());
1900         } else if (isWhitespace) {
1901             buf.append("(whitespace)");
1902         } else {
1903             String text = getText();
1904             if (text.length() > 16) {
1905                 text = text.substring(0, 16) + "...";
1906             }
1907             buf.append(text);
1908         }
1909 
1910         buf.append("@" + getLineNumber() + ":" + getColumnNumber());
1911         if (location != null) {
1912             buf.append(" in ");
1913             buf.append(location);
1914         } else if (reader != null) {
1915             buf.append(" in ");
1916             buf.append(reader.toString());
1917         }
1918         return buf.toString();
1919     }
1920 
getLineNumber()1921     public int getLineNumber() {
1922         int result = bufferStartLine;
1923         for (int i = 0; i < position; i++) {
1924             if (buffer[i] == '\n') {
1925                 result++;
1926             }
1927         }
1928         return result + 1; // the first line is '1'
1929     }
1930 
getColumnNumber()1931     public int getColumnNumber() {
1932         int result = bufferStartColumn;
1933         for (int i = 0; i < position; i++) {
1934             if (buffer[i] == '\n') {
1935                 result = 0;
1936             } else {
1937                 result++;
1938             }
1939         }
1940         return result + 1; // the first column is '1'
1941     }
1942 
isWhitespace()1943     public boolean isWhitespace() throws XmlPullParserException {
1944         if (type != TEXT && type != IGNORABLE_WHITESPACE && type != CDSECT) {
1945             throw new XmlPullParserException(ILLEGAL_TYPE, this, null);
1946         }
1947         return isWhitespace;
1948     }
1949 
getText()1950     public String getText() {
1951         if (type < TEXT || (type == ENTITY_REF && unresolved)) {
1952             return null;
1953         } else if (text == null) {
1954             return "";
1955         } else {
1956             return text;
1957         }
1958     }
1959 
getTextCharacters(int[] poslen)1960     public char[] getTextCharacters(int[] poslen) {
1961         String text = getText();
1962         if (text == null) {
1963             poslen[0] = -1;
1964             poslen[1] = -1;
1965             return null;
1966         }
1967         char[] result = text.toCharArray();
1968         poslen[0] = 0;
1969         poslen[1] = result.length;
1970         return result;
1971     }
1972 
getNamespace()1973     public String getNamespace() {
1974         return namespace;
1975     }
1976 
getName()1977     public String getName() {
1978         return name;
1979     }
1980 
getPrefix()1981     public String getPrefix() {
1982         return prefix;
1983     }
1984 
isEmptyElementTag()1985     public boolean isEmptyElementTag() throws XmlPullParserException {
1986         if (type != START_TAG) {
1987             throw new XmlPullParserException(ILLEGAL_TYPE, this, null);
1988         }
1989         return degenerated;
1990     }
1991 
getAttributeCount()1992     public int getAttributeCount() {
1993         return attributeCount;
1994     }
1995 
getAttributeType(int index)1996     public String getAttributeType(int index) {
1997         return "CDATA";
1998     }
1999 
isAttributeDefault(int index)2000     public boolean isAttributeDefault(int index) {
2001         return false;
2002     }
2003 
getAttributeNamespace(int index)2004     public String getAttributeNamespace(int index) {
2005         if (index >= attributeCount) {
2006             throw new IndexOutOfBoundsException();
2007         }
2008         return attributes[index * 4];
2009     }
2010 
getAttributeName(int index)2011     public String getAttributeName(int index) {
2012         if (index >= attributeCount) {
2013             throw new IndexOutOfBoundsException();
2014         }
2015         return attributes[(index * 4) + 2];
2016     }
2017 
getAttributePrefix(int index)2018     public String getAttributePrefix(int index) {
2019         if (index >= attributeCount) {
2020             throw new IndexOutOfBoundsException();
2021         }
2022         return attributes[(index * 4) + 1];
2023     }
2024 
getAttributeValue(int index)2025     public String getAttributeValue(int index) {
2026         if (index >= attributeCount) {
2027             throw new IndexOutOfBoundsException();
2028         }
2029         return attributes[(index * 4) + 3];
2030     }
2031 
getAttributeValue(String namespace, String name)2032     public String getAttributeValue(String namespace, String name) {
2033         for (int i = (attributeCount * 4) - 4; i >= 0; i -= 4) {
2034             if (attributes[i + 2].equals(name)
2035                     && (namespace == null || attributes[i].equals(namespace))) {
2036                 return attributes[i + 3];
2037             }
2038         }
2039 
2040         return null;
2041     }
2042 
getEventType()2043     public int getEventType() throws XmlPullParserException {
2044         return type;
2045     }
2046 
2047     // utility methods to make XML parsing easier ...
2048 
nextTag()2049     public int nextTag() throws XmlPullParserException, IOException {
2050         next();
2051         if (type == TEXT && isWhitespace) {
2052             next();
2053         }
2054 
2055         if (type != END_TAG && type != START_TAG) {
2056             throw new XmlPullParserException("unexpected type", this, null);
2057         }
2058 
2059         return type;
2060     }
2061 
require(int type, String namespace, String name)2062     public void require(int type, String namespace, String name)
2063             throws XmlPullParserException, IOException {
2064         if (type != this.type
2065                 || (namespace != null && !namespace.equals(getNamespace()))
2066                 || (name != null && !name.equals(getName()))) {
2067             throw new XmlPullParserException(
2068                     "expected: " + TYPES[type] + " {" + namespace + "}" + name, this, null);
2069         }
2070     }
2071 
nextText()2072     public String nextText() throws XmlPullParserException, IOException {
2073         if (type != START_TAG) {
2074             throw new XmlPullParserException("precondition: START_TAG", this, null);
2075         }
2076 
2077         next();
2078 
2079         String result;
2080         if (type == TEXT) {
2081             result = getText();
2082             next();
2083         } else {
2084             result = "";
2085         }
2086 
2087         if (type != END_TAG) {
2088             throw new XmlPullParserException("END_TAG expected", this, null);
2089         }
2090 
2091         return result;
2092     }
2093 
setFeature(String feature, boolean value)2094     public void setFeature(String feature, boolean value) throws XmlPullParserException {
2095         if (XmlPullParser.FEATURE_PROCESS_NAMESPACES.equals(feature)) {
2096             processNsp = value;
2097         } else if (XmlPullParser.FEATURE_PROCESS_DOCDECL.equals(feature)) {
2098             processDocDecl = value;
2099         } else if (FEATURE_RELAXED.equals(feature)) {
2100             relaxed = value;
2101         } else {
2102             throw new XmlPullParserException("unsupported feature: " + feature, this, null);
2103         }
2104     }
2105 
setProperty(String property, Object value)2106     public void setProperty(String property, Object value) throws XmlPullParserException {
2107         if (property.equals(PROPERTY_LOCATION)) {
2108             location = String.valueOf(value);
2109         } else {
2110             throw new XmlPullParserException("unsupported property: " + property);
2111         }
2112     }
2113 
2114     /**
2115      * A chain of buffers containing XML content. Each content source contains
2116      * the parser's primary read buffer or the characters of entities actively
2117      * being parsed.
2118      *
2119      * <p>For example, note the buffers needed to parse this document:
2120      * <pre>   {@code
2121      *   <!DOCTYPE foo [
2122      *       <!ENTITY baz "ghi">
2123      *       <!ENTITY bar "def &baz; jkl">
2124      *   ]>
2125      *   <foo>abc &bar; mno</foo>
2126      * }</pre>
2127      *
2128      * <p>Things get interesting when the bar entity is encountered. At that
2129      * point two buffers are active:
2130      * <ol>
2131      * <li>The value for the bar entity, containing {@code "def &baz; jkl"}
2132      * <li>The parser's primary read buffer, containing {@code " mno</foo>"}
2133      * </ol>
2134      * <p>The parser will return the characters {@code "def "} from the bar
2135      * entity's buffer, and then it will encounter the baz entity. To handle
2136      * that, three buffers will be active:
2137      * <ol>
2138      * <li>The value for the baz entity, containing {@code "ghi"}
2139      * <li>The remaining value for the bar entity, containing {@code " jkl"}
2140      * <li>The parser's primary read buffer, containing {@code " mno</foo>"}
2141      * </ol>
2142      * <p>The parser will then return the characters {@code ghi jkl mno} in that
2143      * sequence by reading each buffer in sequence.
2144      */
2145     static class ContentSource {
2146         private final ContentSource next;
2147         private final char[] buffer;
2148         private final int position;
2149         private final int limit;
ContentSource(ContentSource next, char[] buffer, int position, int limit)2150         ContentSource(ContentSource next, char[] buffer, int position, int limit) {
2151             this.next = next;
2152             this.buffer = buffer;
2153             this.position = position;
2154             this.limit = limit;
2155         }
2156     }
2157 
2158     /**
2159      * Prepends the characters of {@code newBuffer} to be read before the
2160      * current buffer.
2161      */
pushContentSource(char[] newBuffer)2162     private void pushContentSource(char[] newBuffer) {
2163         nextContentSource = new ContentSource(nextContentSource, buffer, position, limit);
2164         buffer = newBuffer;
2165         position = 0;
2166         limit = newBuffer.length;
2167     }
2168 
2169     /**
2170      * Replaces the current exhausted buffer with the next buffer in the chain.
2171      */
popContentSource()2172     private void popContentSource() {
2173         buffer = nextContentSource.buffer;
2174         position = nextContentSource.position;
2175         limit = nextContentSource.limit;
2176         nextContentSource = nextContentSource.next;
2177     }
2178 }
2179