1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package org.json; 18 19 import android.compat.annotation.UnsupportedAppUsage; 20 21 // Note: this class was written without inspecting the non-free org.json sourcecode. 22 23 /** 24 * Parses a JSON (<a href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>) 25 * encoded string into the corresponding object. Most clients of 26 * this class will use only need the {@link #JSONTokener(String) constructor} 27 * and {@link #nextValue} method. Example usage: <pre> 28 * String json = "{" 29 * + " \"query\": \"Pizza\", " 30 * + " \"locations\": [ 94043, 90210 ] " 31 * + "}"; 32 * 33 * JSONObject object = (JSONObject) new JSONTokener(json).nextValue(); 34 * String query = object.getString("query"); 35 * JSONArray locations = object.getJSONArray("locations");</pre> 36 * 37 * <p>For best interoperability and performance use JSON that complies with 38 * RFC 4627, such as that generated by {@link JSONStringer}. For legacy reasons 39 * this parser is lenient, so a successful parse does not indicate that the 40 * input string was valid JSON. All of the following syntax errors will be 41 * ignored: 42 * <ul> 43 * <li>End of line comments starting with {@code //} or {@code #} and ending 44 * with a newline character. 45 * <li>C-style comments starting with {@code /*} and ending with 46 * {@code *}{@code /}. Such comments may not be nested. 47 * <li>Strings that are unquoted or {@code 'single quoted'}. 48 * <li>Hexadecimal integers prefixed with {@code 0x} or {@code 0X}. 49 * <li>Octal integers prefixed with {@code 0}. 50 * <li>Array elements separated by {@code ;}. 51 * <li>Unnecessary array separators. These are interpreted as if null was the 52 * omitted value. 53 * <li>Key-value pairs separated by {@code =} or {@code =>}. 54 * <li>Key-value pairs separated by {@code ;}. 55 * </ul> 56 * 57 * <p>Each tokener may be used to parse a single JSON string. Instances of this 58 * class are not thread safe. Although this class is nonfinal, it was not 59 * designed for inheritance and should not be subclassed. In particular, 60 * self-use by overrideable methods is not specified. See <i>Effective Java</i> 61 * Item 17, "Design and Document or inheritance or else prohibit it" for further 62 * information. 63 */ 64 public class JSONTokener { 65 66 /** The input JSON. */ 67 @UnsupportedAppUsage 68 private final String in; 69 70 /** 71 * The index of the next character to be returned by {@link #next}. When 72 * the input is exhausted, this equals the input's length. 73 */ 74 @UnsupportedAppUsage 75 private int pos; 76 77 /** 78 * @param in JSON encoded string. Null is not permitted and will yield a 79 * tokener that throws {@code NullPointerExceptions} when methods are 80 * called. 81 */ JSONTokener(String in)82 public JSONTokener(String in) { 83 // consume an optional byte order mark (BOM) if it exists 84 if (in != null && in.startsWith("\ufeff")) { 85 in = in.substring(1); 86 } 87 this.in = in; 88 } 89 90 /** 91 * Returns the next value from the input. 92 * 93 * @return a {@link JSONObject}, {@link JSONArray}, String, Boolean, 94 * Integer, Long, Double or {@link JSONObject#NULL}. 95 * @throws JSONException if the input is malformed. 96 */ nextValue()97 public Object nextValue() throws JSONException { 98 int c = nextCleanInternal(); 99 switch (c) { 100 case -1: 101 throw syntaxError("End of input"); 102 103 case '{': 104 return readObject(); 105 106 case '[': 107 return readArray(); 108 109 case '\'': 110 case '"': 111 return nextString((char) c); 112 113 default: 114 pos--; 115 return readLiteral(); 116 } 117 } 118 119 @UnsupportedAppUsage nextCleanInternal()120 private int nextCleanInternal() throws JSONException { 121 while (pos < in.length()) { 122 int c = in.charAt(pos++); 123 switch (c) { 124 case '\t': 125 case ' ': 126 case '\n': 127 case '\r': 128 continue; 129 130 case '/': 131 if (pos == in.length()) { 132 return c; 133 } 134 135 char peek = in.charAt(pos); 136 switch (peek) { 137 case '*': 138 // skip a /* c-style comment */ 139 pos++; 140 int commentEnd = in.indexOf("*/", pos); 141 if (commentEnd == -1) { 142 throw syntaxError("Unterminated comment"); 143 } 144 pos = commentEnd + 2; 145 continue; 146 147 case '/': 148 // skip a // end-of-line comment 149 pos++; 150 skipToEndOfLine(); 151 continue; 152 153 default: 154 return c; 155 } 156 157 case '#': 158 /* 159 * Skip a # hash end-of-line comment. The JSON RFC doesn't 160 * specify this behavior, but it's required to parse 161 * existing documents. See http://b/2571423. 162 */ 163 skipToEndOfLine(); 164 continue; 165 166 default: 167 return c; 168 } 169 } 170 171 return -1; 172 } 173 174 /** 175 * Advances the position until after the next newline character. If the line 176 * is terminated by "\r\n", the '\n' must be consumed as whitespace by the 177 * caller. 178 */ 179 @UnsupportedAppUsage skipToEndOfLine()180 private void skipToEndOfLine() { 181 for (; pos < in.length(); pos++) { 182 char c = in.charAt(pos); 183 if (c == '\r' || c == '\n') { 184 pos++; 185 break; 186 } 187 } 188 } 189 190 /** 191 * Returns the string up to but not including {@code quote}, unescaping any 192 * character escape sequences encountered along the way. The opening quote 193 * should have already been read. This consumes the closing quote, but does 194 * not include it in the returned string. 195 * 196 * @param quote either ' or ". 197 */ nextString(char quote)198 public String nextString(char quote) throws JSONException { 199 /* 200 * For strings that are free of escape sequences, we can just extract 201 * the result as a substring of the input. But if we encounter an escape 202 * sequence, we need to use a StringBuilder to compose the result. 203 */ 204 StringBuilder builder = null; 205 206 /* the index of the first character not yet appended to the builder. */ 207 int start = pos; 208 209 while (pos < in.length()) { 210 int c = in.charAt(pos++); 211 if (c == quote) { 212 if (builder == null) { 213 // a new string avoids leaking memory 214 return new String(in.substring(start, pos - 1)); 215 } else { 216 builder.append(in, start, pos - 1); 217 return builder.toString(); 218 } 219 } 220 221 if (c == '\\') { 222 if (pos == in.length()) { 223 throw syntaxError("Unterminated escape sequence"); 224 } 225 if (builder == null) { 226 builder = new StringBuilder(); 227 } 228 builder.append(in, start, pos - 1); 229 builder.append(readEscapeCharacter()); 230 start = pos; 231 } 232 } 233 234 throw syntaxError("Unterminated string"); 235 } 236 237 /** 238 * Unescapes the character identified by the character or characters that 239 * immediately follow a backslash. The backslash '\' should have already 240 * been read. This supports both unicode escapes "u000A" and two-character 241 * escapes "\n". 242 */ 243 @UnsupportedAppUsage readEscapeCharacter()244 private char readEscapeCharacter() throws JSONException { 245 char escaped = in.charAt(pos++); 246 switch (escaped) { 247 case 'u': 248 if (pos + 4 > in.length()) { 249 throw syntaxError("Unterminated escape sequence"); 250 } 251 String hex = in.substring(pos, pos + 4); 252 pos += 4; 253 try { 254 return (char) Integer.parseInt(hex, 16); 255 } catch (NumberFormatException nfe) { 256 throw syntaxError("Invalid escape sequence: " + hex); 257 } 258 259 case 't': 260 return '\t'; 261 262 case 'b': 263 return '\b'; 264 265 case 'n': 266 return '\n'; 267 268 case 'r': 269 return '\r'; 270 271 case 'f': 272 return '\f'; 273 274 case '\'': 275 case '"': 276 case '\\': 277 default: 278 return escaped; 279 } 280 } 281 282 /** 283 * Reads a null, boolean, numeric or unquoted string literal value. Numeric 284 * values will be returned as an Integer, Long, or Double, in that order of 285 * preference. 286 */ 287 @UnsupportedAppUsage readLiteral()288 private Object readLiteral() throws JSONException { 289 String literal = nextToInternal("{}[]/\\:,=;# \t\f"); 290 291 if (literal.length() == 0) { 292 throw syntaxError("Expected literal value"); 293 } else if ("null".equalsIgnoreCase(literal)) { 294 return JSONObject.NULL; 295 } else if ("true".equalsIgnoreCase(literal)) { 296 return Boolean.TRUE; 297 } else if ("false".equalsIgnoreCase(literal)) { 298 return Boolean.FALSE; 299 } 300 301 /* try to parse as an integral type... */ 302 if (literal.indexOf('.') == -1) { 303 int base = 10; 304 String number = literal; 305 if (number.startsWith("0x") || number.startsWith("0X")) { 306 number = number.substring(2); 307 base = 16; 308 } else if (number.startsWith("0") && number.length() > 1) { 309 number = number.substring(1); 310 base = 8; 311 } 312 try { 313 long longValue = Long.parseLong(number, base); 314 if (longValue <= Integer.MAX_VALUE && longValue >= Integer.MIN_VALUE) { 315 return (int) longValue; 316 } else { 317 return longValue; 318 } 319 } catch (NumberFormatException e) { 320 /* 321 * This only happens for integral numbers greater than 322 * Long.MAX_VALUE, numbers in exponential form (5e-10) and 323 * unquoted strings. Fall through to try floating point. 324 */ 325 } 326 } 327 328 /* ...next try to parse as a floating point... */ 329 try { 330 return Double.valueOf(literal); 331 } catch (NumberFormatException ignored) { 332 } 333 334 /* ... finally give up. We have an unquoted string */ 335 return new String(literal); // a new string avoids leaking memory 336 } 337 338 /** 339 * Returns the string up to but not including any of the given characters or 340 * a newline character. This does not consume the excluded character. 341 */ 342 @UnsupportedAppUsage nextToInternal(String excluded)343 private String nextToInternal(String excluded) { 344 int start = pos; 345 for (; pos < in.length(); pos++) { 346 char c = in.charAt(pos); 347 if (c == '\r' || c == '\n' || excluded.indexOf(c) != -1) { 348 return in.substring(start, pos); 349 } 350 } 351 return in.substring(start); 352 } 353 354 /** 355 * Reads a sequence of key/value pairs and the trailing closing brace '}' of 356 * an object. The opening brace '{' should have already been read. 357 */ 358 @UnsupportedAppUsage readObject()359 private JSONObject readObject() throws JSONException { 360 JSONObject result = new JSONObject(); 361 362 /* Peek to see if this is the empty object. */ 363 int first = nextCleanInternal(); 364 if (first == '}') { 365 return result; 366 } else if (first != -1) { 367 pos--; 368 } 369 370 while (true) { 371 Object name = nextValue(); 372 if (!(name instanceof String)) { 373 if (name == null) { 374 throw syntaxError("Names cannot be null"); 375 } else { 376 throw syntaxError("Names must be strings, but " + name 377 + " is of type " + name.getClass().getName()); 378 } 379 } 380 381 /* 382 * Expect the name/value separator to be either a colon ':', an 383 * equals sign '=', or an arrow "=>". The last two are bogus but we 384 * include them because that's what the original implementation did. 385 */ 386 int separator = nextCleanInternal(); 387 if (separator != ':' && separator != '=') { 388 throw syntaxError("Expected ':' after " + name); 389 } 390 if (pos < in.length() && in.charAt(pos) == '>') { 391 pos++; 392 } 393 394 result.put((String) name, nextValue()); 395 396 switch (nextCleanInternal()) { 397 case '}': 398 return result; 399 case ';': 400 case ',': 401 continue; 402 default: 403 throw syntaxError("Unterminated object"); 404 } 405 } 406 } 407 408 /** 409 * Reads a sequence of values and the trailing closing brace ']' of an 410 * array. The opening brace '[' should have already been read. Note that 411 * "[]" yields an empty array, but "[,]" returns a two-element array 412 * equivalent to "[null,null]". 413 */ 414 @UnsupportedAppUsage readArray()415 private JSONArray readArray() throws JSONException { 416 JSONArray result = new JSONArray(); 417 418 /* to cover input that ends with ",]". */ 419 boolean hasTrailingSeparator = false; 420 421 while (true) { 422 switch (nextCleanInternal()) { 423 case -1: 424 throw syntaxError("Unterminated array"); 425 case ']': 426 if (hasTrailingSeparator) { 427 result.put(null); 428 } 429 return result; 430 case ',': 431 case ';': 432 /* A separator without a value first means "null". */ 433 result.put(null); 434 hasTrailingSeparator = true; 435 continue; 436 default: 437 pos--; 438 } 439 440 result.put(nextValue()); 441 442 switch (nextCleanInternal()) { 443 case ']': 444 return result; 445 case ',': 446 case ';': 447 hasTrailingSeparator = true; 448 continue; 449 default: 450 throw syntaxError("Unterminated array"); 451 } 452 } 453 } 454 455 /** 456 * Returns an exception containing the given message plus the current 457 * position and the entire input string. 458 */ syntaxError(String message)459 public JSONException syntaxError(String message) { 460 return new JSONException(message + this); 461 } 462 463 /** 464 * Returns the current position and the entire input string. 465 */ toString()466 @Override public String toString() { 467 // consistent with the original implementation 468 return " at character " + pos + " of " + in; 469 } 470 471 /* 472 * Legacy APIs. 473 * 474 * None of the methods below are on the critical path of parsing JSON 475 * documents. They exist only because they were exposed by the original 476 * implementation and may be used by some clients. 477 */ 478 479 /** 480 * Returns true until the input has been exhausted. 481 */ more()482 public boolean more() { 483 return pos < in.length(); 484 } 485 486 /** 487 * Returns the next available character, or the null character '\0' if all 488 * input has been exhausted. The return value of this method is ambiguous 489 * for JSON strings that contain the character '\0'. 490 */ next()491 public char next() { 492 return pos < in.length() ? in.charAt(pos++) : '\0'; 493 } 494 495 /** 496 * Returns the next available character if it equals {@code c}. Otherwise an 497 * exception is thrown. 498 */ next(char c)499 public char next(char c) throws JSONException { 500 char result = next(); 501 if (result != c) { 502 throw syntaxError("Expected " + c + " but was " + result); 503 } 504 return result; 505 } 506 507 /** 508 * Returns the next character that is not whitespace and does not belong to 509 * a comment. If the input is exhausted before such a character can be 510 * found, the null character '\0' is returned. The return value of this 511 * method is ambiguous for JSON strings that contain the character '\0'. 512 */ nextClean()513 public char nextClean() throws JSONException { 514 int nextCleanInt = nextCleanInternal(); 515 return nextCleanInt == -1 ? '\0' : (char) nextCleanInt; 516 } 517 518 /** 519 * Returns the next {@code length} characters of the input. 520 * 521 * <p>The returned string shares its backing character array with this 522 * tokener's input string. If a reference to the returned string may be held 523 * indefinitely, you should use {@code new String(result)} to copy it first 524 * to avoid memory leaks. 525 * 526 * @throws JSONException if the remaining input is not long enough to 527 * satisfy this request. 528 */ next(int length)529 public String next(int length) throws JSONException { 530 if (pos + length > in.length()) { 531 throw syntaxError(length + " is out of bounds"); 532 } 533 String result = in.substring(pos, pos + length); 534 pos += length; 535 return result; 536 } 537 538 /** 539 * Returns the {@link String#trim trimmed} string holding the characters up 540 * to but not including the first of: 541 * <ul> 542 * <li>any character in {@code excluded} 543 * <li>a newline character '\n' 544 * <li>a carriage return '\r' 545 * </ul> 546 * 547 * <p>The returned string shares its backing character array with this 548 * tokener's input string. If a reference to the returned string may be held 549 * indefinitely, you should use {@code new String(result)} to copy it first 550 * to avoid memory leaks. 551 * 552 * @return a possibly-empty string 553 */ nextTo(String excluded)554 public String nextTo(String excluded) { 555 if (excluded == null) { 556 throw new NullPointerException("excluded == null"); 557 } 558 return nextToInternal(excluded).trim(); 559 } 560 561 /** 562 * Equivalent to {@code nextTo(String.valueOf(excluded))}. 563 */ nextTo(char excluded)564 public String nextTo(char excluded) { 565 return nextToInternal(String.valueOf(excluded)).trim(); 566 } 567 568 /** 569 * Advances past all input up to and including the next occurrence of 570 * {@code thru}. If the remaining input doesn't contain {@code thru}, the 571 * input is exhausted. 572 */ skipPast(String thru)573 public void skipPast(String thru) { 574 int thruStart = in.indexOf(thru, pos); 575 pos = thruStart == -1 ? in.length() : (thruStart + thru.length()); 576 } 577 578 /** 579 * Advances past all input up to but not including the next occurrence of 580 * {@code to}. If the remaining input doesn't contain {@code to}, the input 581 * is unchanged. 582 */ skipTo(char to)583 public char skipTo(char to) { 584 int index = in.indexOf(to, pos); 585 if (index != -1) { 586 pos = index; 587 return to; 588 } else { 589 return '\0'; 590 } 591 } 592 593 /** 594 * Unreads the most recent character of input. If no input characters have 595 * been read, the input is unchanged. 596 */ back()597 public void back() { 598 if (--pos == -1) { 599 pos = 0; 600 } 601 } 602 603 /** 604 * Returns the integer [0..15] value for the given hex character, or -1 605 * for non-hex input. 606 * 607 * @param hex a character in the ranges [0-9], [A-F] or [a-f]. Any other 608 * character will yield a -1 result. 609 */ dehexchar(char hex)610 public static int dehexchar(char hex) { 611 if (hex >= '0' && hex <= '9') { 612 return hex - '0'; 613 } else if (hex >= 'A' && hex <= 'F') { 614 return hex - 'A' + 10; 615 } else if (hex >= 'a' && hex <= 'f') { 616 return hex - 'a' + 10; 617 } else { 618 return -1; 619 } 620 } 621 } 622