1 package com.google.polo.json;
2 
3 /*
4 Copyright (c) 2002 JSON.org
5 
6 Permission is hereby granted, free of charge, to any person obtaining a copy
7 of this software and associated documentation files (the "Software"), to deal
8 in the Software without restriction, including without limitation the rights
9 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 copies of the Software, and to permit persons to whom the Software is
11 furnished to do so, subject to the following conditions:
12 
13 The above copyright notice and this permission notice shall be included in all
14 copies or substantial portions of the Software.
15 
16 The Software shall be used for Good, not Evil.
17 
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 SOFTWARE.
25 */
26 
27 /**
28  * The XMLTokener extends the JSONTokener to provide additional methods
29  * for the parsing of XML texts.
30  * @author JSON.org
31  * @version 2008-09-18
32  */
33 public class XMLTokener extends JSONTokener {
34 
35 
36    /** The table of entity values. It initially contains Character values for
37     * amp, apos, gt, lt, quot.
38     */
39    public static final java.util.HashMap entity;
40 
41    static {
42        entity = new java.util.HashMap(8);
43        entity.put("amp",  XML.AMP);
44        entity.put("apos", XML.APOS);
45        entity.put("gt",   XML.GT);
46        entity.put("lt",   XML.LT);
47        entity.put("quot", XML.QUOT);
48    }
49 
50     /**
51      * Construct an XMLTokener from a string.
52      * @param s A source string.
53      */
XMLTokener(String s)54     public XMLTokener(String s) {
55         super(s);
56     }
57 
58     /**
59      * Get the text in the CDATA block.
60      * @return The string up to the <code>]]&gt;</code>.
61      * @throws JSONException If the <code>]]&gt;</code> is not found.
62      */
nextCDATA()63     public String nextCDATA() throws JSONException {
64         char         c;
65         int          i;
66         StringBuffer sb = new StringBuffer();
67         for (;;) {
68             c = next();
69             if (c == 0) {
70                 throw syntaxError("Unclosed CDATA");
71             }
72             sb.append(c);
73             i = sb.length() - 3;
74             if (i >= 0 && sb.charAt(i) == ']' &&
75                           sb.charAt(i + 1) == ']' && sb.charAt(i + 2) == '>') {
76                 sb.setLength(i);
77                 return sb.toString();
78             }
79         }
80     }
81 
82 
83     /**
84      * Get the next XML outer token, trimming whitespace. There are two kinds
85      * of tokens: the '<' character which begins a markup tag, and the content
86      * text between markup tags.
87      *
88      * @return  A string, or a '<' Character, or null if there is no more
89      * source text.
90      * @throws JSONException
91      */
nextContent()92     public Object nextContent() throws JSONException {
93         char         c;
94         StringBuffer sb;
95         do {
96             c = next();
97         } while (Character.isWhitespace(c));
98         if (c == 0) {
99             return null;
100         }
101         if (c == '<') {
102             return XML.LT;
103         }
104         sb = new StringBuffer();
105         for (;;) {
106             if (c == '<' || c == 0) {
107                 back();
108                 return sb.toString().trim();
109             }
110             if (c == '&') {
111                 sb.append(nextEntity(c));
112             } else {
113                 sb.append(c);
114             }
115             c = next();
116         }
117     }
118 
119 
120     /**
121      * Return the next entity. These entities are translated to Characters:
122      *     <code>&amp;  &apos;  &gt;  &lt;  &quot;</code>.
123      * @param a An ampersand character.
124      * @return  A Character or an entity String if the entity is not recognized.
125      * @throws JSONException If missing ';' in XML entity.
126      */
nextEntity(char a)127     public Object nextEntity(char a) throws JSONException {
128         StringBuffer sb = new StringBuffer();
129         for (;;) {
130             char c = next();
131             if (Character.isLetterOrDigit(c) || c == '#') {
132                 sb.append(Character.toLowerCase(c));
133             } else if (c == ';') {
134                 break;
135             } else {
136                 throw syntaxError("Missing ';' in XML entity: &" + sb);
137             }
138         }
139         String s = sb.toString();
140         Object e = entity.get(s);
141         return e != null ? e : a + s + ";";
142     }
143 
144 
145     /**
146      * Returns the next XML meta token. This is used for skipping over <!...>
147      * and <?...?> structures.
148      * @return Syntax characters (<code>< > / = ! ?</code>) are returned as
149      *  Character, and strings and names are returned as Boolean. We don't care
150      *  what the values actually are.
151      * @throws JSONException If a string is not properly closed or if the XML
152      *  is badly structured.
153      */
nextMeta()154     public Object nextMeta() throws JSONException {
155         char c;
156         char q;
157         do {
158             c = next();
159         } while (Character.isWhitespace(c));
160         switch (c) {
161         case 0:
162             throw syntaxError("Misshaped meta tag");
163         case '<':
164             return XML.LT;
165         case '>':
166             return XML.GT;
167         case '/':
168             return XML.SLASH;
169         case '=':
170             return XML.EQ;
171         case '!':
172             return XML.BANG;
173         case '?':
174             return XML.QUEST;
175         case '"':
176         case '\'':
177             q = c;
178             for (;;) {
179                 c = next();
180                 if (c == 0) {
181                     throw syntaxError("Unterminated string");
182                 }
183                 if (c == q) {
184                     return Boolean.TRUE;
185                 }
186             }
187         default:
188             for (;;) {
189                 c = next();
190                 if (Character.isWhitespace(c)) {
191                     return Boolean.TRUE;
192                 }
193                 switch (c) {
194                 case 0:
195                 case '<':
196                 case '>':
197                 case '/':
198                 case '=':
199                 case '!':
200                 case '?':
201                 case '"':
202                 case '\'':
203                     back();
204                     return Boolean.TRUE;
205                 }
206             }
207         }
208     }
209 
210 
211     /**
212      * Get the next XML Token. These tokens are found inside of angle
213      * brackets. It may be one of these characters: <code>/ > = ! ?</code> or it
214      * may be a string wrapped in single quotes or double quotes, or it may be a
215      * name.
216      * @return a String or a Character.
217      * @throws JSONException If the XML is not well formed.
218      */
nextToken()219     public Object nextToken() throws JSONException {
220         char c;
221         char q;
222         StringBuffer sb;
223         do {
224             c = next();
225         } while (Character.isWhitespace(c));
226         switch (c) {
227         case 0:
228             throw syntaxError("Misshaped element");
229         case '<':
230             throw syntaxError("Misplaced '<'");
231         case '>':
232             return XML.GT;
233         case '/':
234             return XML.SLASH;
235         case '=':
236             return XML.EQ;
237         case '!':
238             return XML.BANG;
239         case '?':
240             return XML.QUEST;
241 
242 // Quoted string
243 
244         case '"':
245         case '\'':
246             q = c;
247             sb = new StringBuffer();
248             for (;;) {
249                 c = next();
250                 if (c == 0) {
251                     throw syntaxError("Unterminated string");
252                 }
253                 if (c == q) {
254                     return sb.toString();
255                 }
256                 if (c == '&') {
257                     sb.append(nextEntity(c));
258                 } else {
259                     sb.append(c);
260                 }
261             }
262         default:
263 
264 // Name
265 
266             sb = new StringBuffer();
267             for (;;) {
268                 sb.append(c);
269                 c = next();
270                 if (Character.isWhitespace(c)) {
271                     return sb.toString();
272                 }
273                 switch (c) {
274                 case 0:
275                 	return sb.toString();
276                 case '>':
277                 case '/':
278                 case '=':
279                 case '!':
280                 case '?':
281                 case '[':
282                 case ']':
283                     back();
284                     return sb.toString();
285                 case '<':
286                 case '"':
287                 case '\'':
288                     throw syntaxError("Bad character in a name");
289                 }
290             }
291         }
292     }
293 
294 
295     /**
296      * Skip characters until past the requested string.
297      * If it is not found, we are left at the end of the source with a result of false.
298      * @param to A string to skip past.
299      * @throws JSONException
300      */
skipPast(String to)301     public boolean skipPast(String to) throws JSONException {
302     	boolean b;
303     	char c;
304     	int i;
305     	int j;
306     	int offset = 0;
307     	int n = to.length();
308         char[] circle = new char[n];
309 
310         /*
311          * First fill the circle buffer with as many characters as are in the
312          * to string. If we reach an early end, bail.
313          */
314 
315     	for (i = 0; i < n; i += 1) {
316     		c = next();
317     		if (c == 0) {
318     			return false;
319     		}
320     		circle[i] = c;
321     	}
322     	/*
323     	 * We will loop, possibly for all of the remaining characters.
324     	 */
325     	for (;;) {
326     		j = offset;
327     		b = true;
328     		/*
329     		 * Compare the circle buffer with the to string.
330     		 */
331     		for (i = 0; i < n; i += 1) {
332     			if (circle[j] != to.charAt(i)) {
333     				b = false;
334     				break;
335     			}
336     			j += 1;
337     			if (j >= n) {
338     				j -= n;
339     			}
340     		}
341     		/*
342     		 * If we exit the loop with b intact, then victory is ours.
343     		 */
344     		if (b) {
345     			return true;
346     		}
347     		/*
348     		 * Get the next character. If there isn't one, then defeat is ours.
349     		 */
350     		c = next();
351     		if (c == 0) {
352     			return false;
353     		}
354     		/*
355     		 * Shove the character in the circle buffer and advance the
356     		 * circle offset. The offset is mod n.
357     		 */
358     		circle[offset] = c;
359     		offset += 1;
360     		if (offset >= n) {
361     			offset -= n;
362     		}
363     	}
364     }
365 }
366