1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package org.json;
18 
19 import android.compat.annotation.UnsupportedAppUsage;
20 
21 // Note: this class was written without inspecting the non-free org.json sourcecode.
22 
23 /**
24  * Parses a JSON (<a href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>)
25  * encoded string into the corresponding object. Most clients of
26  * this class will use only need the {@link #JSONTokener(String) constructor}
27  * and {@link #nextValue} method. Example usage: <pre>
28  * String json = "{"
29  *         + "  \"query\": \"Pizza\", "
30  *         + "  \"locations\": [ 94043, 90210 ] "
31  *         + "}";
32  *
33  * JSONObject object = (JSONObject) new JSONTokener(json).nextValue();
34  * String query = object.getString("query");
35  * JSONArray locations = object.getJSONArray("locations");</pre>
36  *
37  * <p>For best interoperability and performance use JSON that complies with
38  * RFC 4627, such as that generated by {@link JSONStringer}. For legacy reasons
39  * this parser is lenient, so a successful parse does not indicate that the
40  * input string was valid JSON. All of the following syntax errors will be
41  * ignored:
42  * <ul>
43  *   <li>End of line comments starting with {@code //} or {@code #} and ending
44  *       with a newline character.
45  *   <li>C-style comments starting with {@code /*} and ending with
46  *       {@code *}{@code /}. Such comments may not be nested.
47  *   <li>Strings that are unquoted or {@code 'single quoted'}.
48  *   <li>Hexadecimal integers prefixed with {@code 0x} or {@code 0X}.
49  *   <li>Octal integers prefixed with {@code 0}.
50  *   <li>Array elements separated by {@code ;}.
51  *   <li>Unnecessary array separators. These are interpreted as if null was the
52  *       omitted value.
53  *   <li>Key-value pairs separated by {@code =} or {@code =>}.
54  *   <li>Key-value pairs separated by {@code ;}.
55  * </ul>
56  *
57  * <p>Each tokener may be used to parse a single JSON string. Instances of this
58  * class are not thread safe. Although this class is nonfinal, it was not
59  * designed for inheritance and should not be subclassed. In particular,
60  * self-use by overrideable methods is not specified. See <i>Effective Java</i>
61  * Item 17, "Design and Document or inheritance or else prohibit it" for further
62  * information.
63  */
64 public class JSONTokener {
65 
66     /** The input JSON. */
67     @UnsupportedAppUsage
68     private final String in;
69 
70     /**
71      * The index of the next character to be returned by {@link #next}. When
72      * the input is exhausted, this equals the input's length.
73      */
74     @UnsupportedAppUsage
75     private int pos;
76 
77     /**
78      * @param in JSON encoded string. Null is not permitted and will yield a
79      *     tokener that throws {@code NullPointerExceptions} when methods are
80      *     called.
81      */
JSONTokener(String in)82     public JSONTokener(String in) {
83         // consume an optional byte order mark (BOM) if it exists
84         if (in != null && in.startsWith("\ufeff")) {
85             in = in.substring(1);
86         }
87         this.in = in;
88     }
89 
90     /**
91      * Returns the next value from the input.
92      *
93      * @return a {@link JSONObject}, {@link JSONArray}, String, Boolean,
94      *     Integer, Long, Double or {@link JSONObject#NULL}.
95      * @throws JSONException if the input is malformed.
96      */
nextValue()97     public Object nextValue() throws JSONException {
98         int c = nextCleanInternal();
99         switch (c) {
100             case -1:
101                 throw syntaxError("End of input");
102 
103             case '{':
104                 return readObject();
105 
106             case '[':
107                 return readArray();
108 
109             case '\'':
110             case '"':
111                 return nextString((char) c);
112 
113             default:
114                 pos--;
115                 return readLiteral();
116         }
117     }
118 
119     @UnsupportedAppUsage
nextCleanInternal()120     private int nextCleanInternal() throws JSONException {
121         while (pos < in.length()) {
122             int c = in.charAt(pos++);
123             switch (c) {
124                 case '\t':
125                 case ' ':
126                 case '\n':
127                 case '\r':
128                     continue;
129 
130                 case '/':
131                     if (pos == in.length()) {
132                         return c;
133                     }
134 
135                     char peek = in.charAt(pos);
136                     switch (peek) {
137                         case '*':
138                             // skip a /* c-style comment */
139                             pos++;
140                             int commentEnd = in.indexOf("*/", pos);
141                             if (commentEnd == -1) {
142                                 throw syntaxError("Unterminated comment");
143                             }
144                             pos = commentEnd + 2;
145                             continue;
146 
147                         case '/':
148                             // skip a // end-of-line comment
149                             pos++;
150                             skipToEndOfLine();
151                             continue;
152 
153                         default:
154                             return c;
155                     }
156 
157                 case '#':
158                     /*
159                      * Skip a # hash end-of-line comment. The JSON RFC doesn't
160                      * specify this behavior, but it's required to parse
161                      * existing documents. See http://b/2571423.
162                      */
163                     skipToEndOfLine();
164                     continue;
165 
166                 default:
167                     return c;
168             }
169         }
170 
171         return -1;
172     }
173 
174     /**
175      * Advances the position until after the next newline character. If the line
176      * is terminated by "\r\n", the '\n' must be consumed as whitespace by the
177      * caller.
178      */
179     @UnsupportedAppUsage
skipToEndOfLine()180     private void skipToEndOfLine() {
181         for (; pos < in.length(); pos++) {
182             char c = in.charAt(pos);
183             if (c == '\r' || c == '\n') {
184                 pos++;
185                 break;
186             }
187         }
188     }
189 
190     /**
191      * Returns the string up to but not including {@code quote}, unescaping any
192      * character escape sequences encountered along the way. The opening quote
193      * should have already been read. This consumes the closing quote, but does
194      * not include it in the returned string.
195      *
196      * @param quote either ' or ".
197      */
nextString(char quote)198     public String nextString(char quote) throws JSONException {
199         /*
200          * For strings that are free of escape sequences, we can just extract
201          * the result as a substring of the input. But if we encounter an escape
202          * sequence, we need to use a StringBuilder to compose the result.
203          */
204         StringBuilder builder = null;
205 
206         /* the index of the first character not yet appended to the builder. */
207         int start = pos;
208 
209         while (pos < in.length()) {
210             int c = in.charAt(pos++);
211             if (c == quote) {
212                 if (builder == null) {
213                     // a new string avoids leaking memory
214                     return new String(in.substring(start, pos - 1));
215                 } else {
216                     builder.append(in, start, pos - 1);
217                     return builder.toString();
218                 }
219             }
220 
221             if (c == '\\') {
222                 if (pos == in.length()) {
223                     throw syntaxError("Unterminated escape sequence");
224                 }
225                 if (builder == null) {
226                     builder = new StringBuilder();
227                 }
228                 builder.append(in, start, pos - 1);
229                 builder.append(readEscapeCharacter());
230                 start = pos;
231             }
232         }
233 
234         throw syntaxError("Unterminated string");
235     }
236 
237     /**
238      * Unescapes the character identified by the character or characters that
239      * immediately follow a backslash. The backslash '\' should have already
240      * been read. This supports both unicode escapes "u000A" and two-character
241      * escapes "\n".
242      */
243     @UnsupportedAppUsage
readEscapeCharacter()244     private char readEscapeCharacter() throws JSONException {
245         char escaped = in.charAt(pos++);
246         switch (escaped) {
247             case 'u':
248                 if (pos + 4 > in.length()) {
249                     throw syntaxError("Unterminated escape sequence");
250                 }
251                 String hex = in.substring(pos, pos + 4);
252                 pos += 4;
253                 try {
254                     return (char) Integer.parseInt(hex, 16);
255                 } catch (NumberFormatException nfe) {
256                     throw syntaxError("Invalid escape sequence: " + hex);
257                 }
258 
259             case 't':
260                 return '\t';
261 
262             case 'b':
263                 return '\b';
264 
265             case 'n':
266                 return '\n';
267 
268             case 'r':
269                 return '\r';
270 
271             case 'f':
272                 return '\f';
273 
274             case '\'':
275             case '"':
276             case '\\':
277             default:
278                 return escaped;
279         }
280     }
281 
282     /**
283      * Reads a null, boolean, numeric or unquoted string literal value. Numeric
284      * values will be returned as an Integer, Long, or Double, in that order of
285      * preference.
286      */
287     @UnsupportedAppUsage
readLiteral()288     private Object readLiteral() throws JSONException {
289         String literal = nextToInternal("{}[]/\\:,=;# \t\f");
290 
291         if (literal.length() == 0) {
292             throw syntaxError("Expected literal value");
293         } else if ("null".equalsIgnoreCase(literal)) {
294             return JSONObject.NULL;
295         } else if ("true".equalsIgnoreCase(literal)) {
296             return Boolean.TRUE;
297         } else if ("false".equalsIgnoreCase(literal)) {
298             return Boolean.FALSE;
299         }
300 
301         /* try to parse as an integral type... */
302         if (literal.indexOf('.') == -1) {
303             int base = 10;
304             String number = literal;
305             if (number.startsWith("0x") || number.startsWith("0X")) {
306                 number = number.substring(2);
307                 base = 16;
308             } else if (number.startsWith("0") && number.length() > 1) {
309                 number = number.substring(1);
310                 base = 8;
311             }
312             try {
313                 long longValue = Long.parseLong(number, base);
314                 if (longValue <= Integer.MAX_VALUE && longValue >= Integer.MIN_VALUE) {
315                     return (int) longValue;
316                 } else {
317                     return longValue;
318                 }
319             } catch (NumberFormatException e) {
320                 /*
321                  * This only happens for integral numbers greater than
322                  * Long.MAX_VALUE, numbers in exponential form (5e-10) and
323                  * unquoted strings. Fall through to try floating point.
324                  */
325             }
326         }
327 
328         /* ...next try to parse as a floating point... */
329         try {
330             return Double.valueOf(literal);
331         } catch (NumberFormatException ignored) {
332         }
333 
334         /* ... finally give up. We have an unquoted string */
335         return new String(literal); // a new string avoids leaking memory
336     }
337 
338     /**
339      * Returns the string up to but not including any of the given characters or
340      * a newline character. This does not consume the excluded character.
341      */
342     @UnsupportedAppUsage
nextToInternal(String excluded)343     private String nextToInternal(String excluded) {
344         int start = pos;
345         for (; pos < in.length(); pos++) {
346             char c = in.charAt(pos);
347             if (c == '\r' || c == '\n' || excluded.indexOf(c) != -1) {
348                 return in.substring(start, pos);
349             }
350         }
351         return in.substring(start);
352     }
353 
354     /**
355      * Reads a sequence of key/value pairs and the trailing closing brace '}' of
356      * an object. The opening brace '{' should have already been read.
357      */
358     @UnsupportedAppUsage
readObject()359     private JSONObject readObject() throws JSONException {
360         JSONObject result = new JSONObject();
361 
362         /* Peek to see if this is the empty object. */
363         int first = nextCleanInternal();
364         if (first == '}') {
365             return result;
366         } else if (first != -1) {
367             pos--;
368         }
369 
370         while (true) {
371             Object name = nextValue();
372             if (!(name instanceof String)) {
373                 if (name == null) {
374                     throw syntaxError("Names cannot be null");
375                 } else {
376                     throw syntaxError("Names must be strings, but " + name
377                             + " is of type " + name.getClass().getName());
378                 }
379             }
380 
381             /*
382              * Expect the name/value separator to be either a colon ':', an
383              * equals sign '=', or an arrow "=>". The last two are bogus but we
384              * include them because that's what the original implementation did.
385              */
386             int separator = nextCleanInternal();
387             if (separator != ':' && separator != '=') {
388                 throw syntaxError("Expected ':' after " + name);
389             }
390             if (pos < in.length() && in.charAt(pos) == '>') {
391                 pos++;
392             }
393 
394             result.put((String) name, nextValue());
395 
396             switch (nextCleanInternal()) {
397                 case '}':
398                     return result;
399                 case ';':
400                 case ',':
401                     continue;
402                 default:
403                     throw syntaxError("Unterminated object");
404             }
405         }
406     }
407 
408     /**
409      * Reads a sequence of values and the trailing closing brace ']' of an
410      * array. The opening brace '[' should have already been read. Note that
411      * "[]" yields an empty array, but "[,]" returns a two-element array
412      * equivalent to "[null,null]".
413      */
414     @UnsupportedAppUsage
readArray()415     private JSONArray readArray() throws JSONException {
416         JSONArray result = new JSONArray();
417 
418         /* to cover input that ends with ",]". */
419         boolean hasTrailingSeparator = false;
420 
421         while (true) {
422             switch (nextCleanInternal()) {
423                 case -1:
424                     throw syntaxError("Unterminated array");
425                 case ']':
426                     if (hasTrailingSeparator) {
427                         result.put(null);
428                     }
429                     return result;
430                 case ',':
431                 case ';':
432                     /* A separator without a value first means "null". */
433                     result.put(null);
434                     hasTrailingSeparator = true;
435                     continue;
436                 default:
437                     pos--;
438             }
439 
440             result.put(nextValue());
441 
442             switch (nextCleanInternal()) {
443                 case ']':
444                     return result;
445                 case ',':
446                 case ';':
447                     hasTrailingSeparator = true;
448                     continue;
449                 default:
450                     throw syntaxError("Unterminated array");
451             }
452         }
453     }
454 
455     /**
456      * Returns an exception containing the given message plus the current
457      * position and the entire input string.
458      */
syntaxError(String message)459     public JSONException syntaxError(String message) {
460         return new JSONException(message + this);
461     }
462 
463     /**
464      * Returns the current position and the entire input string.
465      */
toString()466     @Override public String toString() {
467         // consistent with the original implementation
468         return " at character " + pos + " of " + in;
469     }
470 
471     /*
472      * Legacy APIs.
473      *
474      * None of the methods below are on the critical path of parsing JSON
475      * documents. They exist only because they were exposed by the original
476      * implementation and may be used by some clients.
477      */
478 
479     /**
480      * Returns true until the input has been exhausted.
481      */
more()482     public boolean more() {
483         return pos < in.length();
484     }
485 
486     /**
487      * Returns the next available character, or the null character '\0' if all
488      * input has been exhausted. The return value of this method is ambiguous
489      * for JSON strings that contain the character '\0'.
490      */
next()491     public char next() {
492         return pos < in.length() ? in.charAt(pos++) : '\0';
493     }
494 
495     /**
496      * Returns the next available character if it equals {@code c}. Otherwise an
497      * exception is thrown.
498      */
next(char c)499     public char next(char c) throws JSONException {
500         char result = next();
501         if (result != c) {
502             throw syntaxError("Expected " + c + " but was " + result);
503         }
504         return result;
505     }
506 
507     /**
508      * Returns the next character that is not whitespace and does not belong to
509      * a comment. If the input is exhausted before such a character can be
510      * found, the null character '\0' is returned. The return value of this
511      * method is ambiguous for JSON strings that contain the character '\0'.
512      */
nextClean()513     public char nextClean() throws JSONException {
514         int nextCleanInt = nextCleanInternal();
515         return nextCleanInt == -1 ? '\0' : (char) nextCleanInt;
516     }
517 
518     /**
519      * Returns the next {@code length} characters of the input.
520      *
521      * <p>The returned string shares its backing character array with this
522      * tokener's input string. If a reference to the returned string may be held
523      * indefinitely, you should use {@code new String(result)} to copy it first
524      * to avoid memory leaks.
525      *
526      * @throws JSONException if the remaining input is not long enough to
527      *     satisfy this request.
528      */
next(int length)529     public String next(int length) throws JSONException {
530         if (pos + length > in.length()) {
531             throw syntaxError(length + " is out of bounds");
532         }
533         String result = in.substring(pos, pos + length);
534         pos += length;
535         return result;
536     }
537 
538     /**
539      * Returns the {@link String#trim trimmed} string holding the characters up
540      * to but not including the first of:
541      * <ul>
542      *   <li>any character in {@code excluded}
543      *   <li>a newline character '\n'
544      *   <li>a carriage return '\r'
545      * </ul>
546      *
547      * <p>The returned string shares its backing character array with this
548      * tokener's input string. If a reference to the returned string may be held
549      * indefinitely, you should use {@code new String(result)} to copy it first
550      * to avoid memory leaks.
551      *
552      * @return a possibly-empty string
553      */
nextTo(String excluded)554     public String nextTo(String excluded) {
555         if (excluded == null) {
556             throw new NullPointerException("excluded == null");
557         }
558         return nextToInternal(excluded).trim();
559     }
560 
561     /**
562      * Equivalent to {@code nextTo(String.valueOf(excluded))}.
563      */
nextTo(char excluded)564     public String nextTo(char excluded) {
565         return nextToInternal(String.valueOf(excluded)).trim();
566     }
567 
568     /**
569      * Advances past all input up to and including the next occurrence of
570      * {@code thru}. If the remaining input doesn't contain {@code thru}, the
571      * input is exhausted.
572      */
skipPast(String thru)573     public void skipPast(String thru) {
574         int thruStart = in.indexOf(thru, pos);
575         pos = thruStart == -1 ? in.length() : (thruStart + thru.length());
576     }
577 
578     /**
579      * Advances past all input up to but not including the next occurrence of
580      * {@code to}. If the remaining input doesn't contain {@code to}, the input
581      * is unchanged.
582      */
skipTo(char to)583     public char skipTo(char to) {
584         int index = in.indexOf(to, pos);
585         if (index != -1) {
586             pos = index;
587             return to;
588         } else {
589             return '\0';
590         }
591     }
592 
593     /**
594      * Unreads the most recent character of input. If no input characters have
595      * been read, the input is unchanged.
596      */
back()597     public void back() {
598         if (--pos == -1) {
599             pos = 0;
600         }
601     }
602 
603     /**
604      * Returns the integer [0..15] value for the given hex character, or -1
605      * for non-hex input.
606      *
607      * @param hex a character in the ranges [0-9], [A-F] or [a-f]. Any other
608      *     character will yield a -1 result.
609      */
dehexchar(char hex)610     public static int dehexchar(char hex) {
611         if (hex >= '0' && hex <= '9') {
612             return hex - '0';
613         } else if (hex >= 'A' && hex <= 'F') {
614             return hex - 'A' + 10;
615         } else if (hex >= 'a' && hex <= 'f') {
616             return hex - 'a' + 10;
617         } else {
618             return -1;
619         }
620     }
621 }
622