1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package org.json; 18 19 // Note: this class was written without inspecting the non-free org.json sourcecode. 20 21 /** 22 * Parses a JSON (<a href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>) 23 * encoded string into the corresponding object. Most clients of 24 * this class will use only need the {@link #JSONTokener(String) constructor} 25 * and {@link #nextValue} method. Example usage: <pre> 26 * String json = "{" 27 * + " \"query\": \"Pizza\", " 28 * + " \"locations\": [ 94043, 90210 ] " 29 * + "}"; 30 * 31 * JSONObject object = (JSONObject) new JSONTokener(json).nextValue(); 32 * String query = object.getString("query"); 33 * JSONArray locations = object.getJSONArray("locations");</pre> 34 * 35 * <p>For best interoperability and performance use JSON that complies with 36 * RFC 4627, such as that generated by {@link JSONStringer}. For legacy reasons 37 * this parser is lenient, so a successful parse does not indicate that the 38 * input string was valid JSON. All of the following syntax errors will be 39 * ignored: 40 * <ul> 41 * <li>End of line comments starting with {@code //} or {@code #} and ending 42 * with a newline character. 43 * <li>C-style comments starting with {@code /*} and ending with 44 * {@code *}{@code /}. Such comments may not be nested. 45 * <li>Strings that are unquoted or {@code 'single quoted'}. 46 * <li>Hexadecimal integers prefixed with {@code 0x} or {@code 0X}. 47 * <li>Octal integers prefixed with {@code 0}. 48 * <li>Array elements separated by {@code ;}. 49 * <li>Unnecessary array separators. These are interpreted as if null was the 50 * omitted value. 51 * <li>Key-value pairs separated by {@code =} or {@code =>}. 52 * <li>Key-value pairs separated by {@code ;}. 53 * </ul> 54 * 55 * <p>Each tokener may be used to parse a single JSON string. Instances of this 56 * class are not thread safe. Although this class is nonfinal, it was not 57 * designed for inheritance and should not be subclassed. In particular, 58 * self-use by overrideable methods is not specified. See <i>Effective Java</i> 59 * Item 17, "Design and Document or inheritance or else prohibit it" for further 60 * information. 61 */ 62 public class JSONTokener { 63 64 /** The input JSON. */ 65 private final String in; 66 67 /** 68 * The index of the next character to be returned by {@link #next}. When 69 * the input is exhausted, this equals the input's length. 70 */ 71 private int pos; 72 73 /** 74 * @param in JSON encoded string. Null is not permitted and will yield a 75 * tokener that throws {@code NullPointerExceptions} when methods are 76 * called. 77 */ JSONTokener(String in)78 public JSONTokener(String in) { 79 // consume an optional byte order mark (BOM) if it exists 80 if (in != null && in.startsWith("\ufeff")) { 81 in = in.substring(1); 82 } 83 this.in = in; 84 } 85 86 /** 87 * Returns the next value from the input. 88 * 89 * @return a {@link JSONObject}, {@link JSONArray}, String, Boolean, 90 * Integer, Long, Double or {@link JSONObject#NULL}. 91 * @throws JSONException if the input is malformed. 92 */ nextValue()93 public Object nextValue() throws JSONException { 94 int c = nextCleanInternal(); 95 switch (c) { 96 case -1: 97 throw syntaxError("End of input"); 98 99 case '{': 100 return readObject(); 101 102 case '[': 103 return readArray(); 104 105 case '\'': 106 case '"': 107 return nextString((char) c); 108 109 default: 110 pos--; 111 return readLiteral(); 112 } 113 } 114 nextCleanInternal()115 private int nextCleanInternal() throws JSONException { 116 while (pos < in.length()) { 117 int c = in.charAt(pos++); 118 switch (c) { 119 case '\t': 120 case ' ': 121 case '\n': 122 case '\r': 123 continue; 124 125 case '/': 126 if (pos == in.length()) { 127 return c; 128 } 129 130 char peek = in.charAt(pos); 131 switch (peek) { 132 case '*': 133 // skip a /* c-style comment */ 134 pos++; 135 int commentEnd = in.indexOf("*/", pos); 136 if (commentEnd == -1) { 137 throw syntaxError("Unterminated comment"); 138 } 139 pos = commentEnd + 2; 140 continue; 141 142 case '/': 143 // skip a // end-of-line comment 144 pos++; 145 skipToEndOfLine(); 146 continue; 147 148 default: 149 return c; 150 } 151 152 case '#': 153 /* 154 * Skip a # hash end-of-line comment. The JSON RFC doesn't 155 * specify this behavior, but it's required to parse 156 * existing documents. See http://b/2571423. 157 */ 158 skipToEndOfLine(); 159 continue; 160 161 default: 162 return c; 163 } 164 } 165 166 return -1; 167 } 168 169 /** 170 * Advances the position until after the next newline character. If the line 171 * is terminated by "\r\n", the '\n' must be consumed as whitespace by the 172 * caller. 173 */ skipToEndOfLine()174 private void skipToEndOfLine() { 175 for (; pos < in.length(); pos++) { 176 char c = in.charAt(pos); 177 if (c == '\r' || c == '\n') { 178 pos++; 179 break; 180 } 181 } 182 } 183 184 /** 185 * Returns the string up to but not including {@code quote}, unescaping any 186 * character escape sequences encountered along the way. The opening quote 187 * should have already been read. This consumes the closing quote, but does 188 * not include it in the returned string. 189 * 190 * @param quote either ' or ". 191 */ nextString(char quote)192 public String nextString(char quote) throws JSONException { 193 /* 194 * For strings that are free of escape sequences, we can just extract 195 * the result as a substring of the input. But if we encounter an escape 196 * sequence, we need to use a StringBuilder to compose the result. 197 */ 198 StringBuilder builder = null; 199 200 /* the index of the first character not yet appended to the builder. */ 201 int start = pos; 202 203 while (pos < in.length()) { 204 int c = in.charAt(pos++); 205 if (c == quote) { 206 if (builder == null) { 207 // a new string avoids leaking memory 208 return new String(in.substring(start, pos - 1)); 209 } else { 210 builder.append(in, start, pos - 1); 211 return builder.toString(); 212 } 213 } 214 215 if (c == '\\') { 216 if (pos == in.length()) { 217 throw syntaxError("Unterminated escape sequence"); 218 } 219 if (builder == null) { 220 builder = new StringBuilder(); 221 } 222 builder.append(in, start, pos - 1); 223 builder.append(readEscapeCharacter()); 224 start = pos; 225 } 226 } 227 228 throw syntaxError("Unterminated string"); 229 } 230 231 /** 232 * Unescapes the character identified by the character or characters that 233 * immediately follow a backslash. The backslash '\' should have already 234 * been read. This supports both unicode escapes "u000A" and two-character 235 * escapes "\n". 236 */ readEscapeCharacter()237 private char readEscapeCharacter() throws JSONException { 238 char escaped = in.charAt(pos++); 239 switch (escaped) { 240 case 'u': 241 if (pos + 4 > in.length()) { 242 throw syntaxError("Unterminated escape sequence"); 243 } 244 String hex = in.substring(pos, pos + 4); 245 pos += 4; 246 try { 247 return (char) Integer.parseInt(hex, 16); 248 } catch (NumberFormatException nfe) { 249 throw syntaxError("Invalid escape sequence: " + hex); 250 } 251 252 case 't': 253 return '\t'; 254 255 case 'b': 256 return '\b'; 257 258 case 'n': 259 return '\n'; 260 261 case 'r': 262 return '\r'; 263 264 case 'f': 265 return '\f'; 266 267 case '\'': 268 case '"': 269 case '\\': 270 default: 271 return escaped; 272 } 273 } 274 275 /** 276 * Reads a null, boolean, numeric or unquoted string literal value. Numeric 277 * values will be returned as an Integer, Long, or Double, in that order of 278 * preference. 279 */ readLiteral()280 private Object readLiteral() throws JSONException { 281 String literal = nextToInternal("{}[]/\\:,=;# \t\f"); 282 283 if (literal.length() == 0) { 284 throw syntaxError("Expected literal value"); 285 } else if ("null".equalsIgnoreCase(literal)) { 286 return JSONObject.NULL; 287 } else if ("true".equalsIgnoreCase(literal)) { 288 return Boolean.TRUE; 289 } else if ("false".equalsIgnoreCase(literal)) { 290 return Boolean.FALSE; 291 } 292 293 /* try to parse as an integral type... */ 294 if (literal.indexOf('.') == -1) { 295 int base = 10; 296 String number = literal; 297 if (number.startsWith("0x") || number.startsWith("0X")) { 298 number = number.substring(2); 299 base = 16; 300 } else if (number.startsWith("0") && number.length() > 1) { 301 number = number.substring(1); 302 base = 8; 303 } 304 try { 305 long longValue = Long.parseLong(number, base); 306 if (longValue <= Integer.MAX_VALUE && longValue >= Integer.MIN_VALUE) { 307 return (int) longValue; 308 } else { 309 return longValue; 310 } 311 } catch (NumberFormatException e) { 312 /* 313 * This only happens for integral numbers greater than 314 * Long.MAX_VALUE, numbers in exponential form (5e-10) and 315 * unquoted strings. Fall through to try floating point. 316 */ 317 } 318 } 319 320 /* ...next try to parse as a floating point... */ 321 try { 322 return Double.valueOf(literal); 323 } catch (NumberFormatException ignored) { 324 } 325 326 /* ... finally give up. We have an unquoted string */ 327 return new String(literal); // a new string avoids leaking memory 328 } 329 330 /** 331 * Returns the string up to but not including any of the given characters or 332 * a newline character. This does not consume the excluded character. 333 */ nextToInternal(String excluded)334 private String nextToInternal(String excluded) { 335 int start = pos; 336 for (; pos < in.length(); pos++) { 337 char c = in.charAt(pos); 338 if (c == '\r' || c == '\n' || excluded.indexOf(c) != -1) { 339 return in.substring(start, pos); 340 } 341 } 342 return in.substring(start); 343 } 344 345 /** 346 * Reads a sequence of key/value pairs and the trailing closing brace '}' of 347 * an object. The opening brace '{' should have already been read. 348 */ readObject()349 private JSONObject readObject() throws JSONException { 350 JSONObject result = new JSONObject(); 351 352 /* Peek to see if this is the empty object. */ 353 int first = nextCleanInternal(); 354 if (first == '}') { 355 return result; 356 } else if (first != -1) { 357 pos--; 358 } 359 360 while (true) { 361 Object name = nextValue(); 362 if (!(name instanceof String)) { 363 if (name == null) { 364 throw syntaxError("Names cannot be null"); 365 } else { 366 throw syntaxError("Names must be strings, but " + name 367 + " is of type " + name.getClass().getName()); 368 } 369 } 370 371 /* 372 * Expect the name/value separator to be either a colon ':', an 373 * equals sign '=', or an arrow "=>". The last two are bogus but we 374 * include them because that's what the original implementation did. 375 */ 376 int separator = nextCleanInternal(); 377 if (separator != ':' && separator != '=') { 378 throw syntaxError("Expected ':' after " + name); 379 } 380 if (pos < in.length() && in.charAt(pos) == '>') { 381 pos++; 382 } 383 384 result.put((String) name, nextValue()); 385 386 switch (nextCleanInternal()) { 387 case '}': 388 return result; 389 case ';': 390 case ',': 391 continue; 392 default: 393 throw syntaxError("Unterminated object"); 394 } 395 } 396 } 397 398 /** 399 * Reads a sequence of values and the trailing closing brace ']' of an 400 * array. The opening brace '[' should have already been read. Note that 401 * "[]" yields an empty array, but "[,]" returns a two-element array 402 * equivalent to "[null,null]". 403 */ readArray()404 private JSONArray readArray() throws JSONException { 405 JSONArray result = new JSONArray(); 406 407 /* to cover input that ends with ",]". */ 408 boolean hasTrailingSeparator = false; 409 410 while (true) { 411 switch (nextCleanInternal()) { 412 case -1: 413 throw syntaxError("Unterminated array"); 414 case ']': 415 if (hasTrailingSeparator) { 416 result.put(null); 417 } 418 return result; 419 case ',': 420 case ';': 421 /* A separator without a value first means "null". */ 422 result.put(null); 423 hasTrailingSeparator = true; 424 continue; 425 default: 426 pos--; 427 } 428 429 result.put(nextValue()); 430 431 switch (nextCleanInternal()) { 432 case ']': 433 return result; 434 case ',': 435 case ';': 436 hasTrailingSeparator = true; 437 continue; 438 default: 439 throw syntaxError("Unterminated array"); 440 } 441 } 442 } 443 444 /** 445 * Returns an exception containing the given message plus the current 446 * position and the entire input string. 447 */ syntaxError(String message)448 public JSONException syntaxError(String message) { 449 return new JSONException(message + this); 450 } 451 452 /** 453 * Returns the current position and the entire input string. 454 */ toString()455 @Override public String toString() { 456 // consistent with the original implementation 457 return " at character " + pos + " of " + in; 458 } 459 460 /* 461 * Legacy APIs. 462 * 463 * None of the methods below are on the critical path of parsing JSON 464 * documents. They exist only because they were exposed by the original 465 * implementation and may be used by some clients. 466 */ 467 468 /** 469 * Returns true until the input has been exhausted. 470 */ more()471 public boolean more() { 472 return pos < in.length(); 473 } 474 475 /** 476 * Returns the next available character, or the null character '\0' if all 477 * input has been exhausted. The return value of this method is ambiguous 478 * for JSON strings that contain the character '\0'. 479 */ next()480 public char next() { 481 return pos < in.length() ? in.charAt(pos++) : '\0'; 482 } 483 484 /** 485 * Returns the next available character if it equals {@code c}. Otherwise an 486 * exception is thrown. 487 */ next(char c)488 public char next(char c) throws JSONException { 489 char result = next(); 490 if (result != c) { 491 throw syntaxError("Expected " + c + " but was " + result); 492 } 493 return result; 494 } 495 496 /** 497 * Returns the next character that is not whitespace and does not belong to 498 * a comment. If the input is exhausted before such a character can be 499 * found, the null character '\0' is returned. The return value of this 500 * method is ambiguous for JSON strings that contain the character '\0'. 501 */ nextClean()502 public char nextClean() throws JSONException { 503 int nextCleanInt = nextCleanInternal(); 504 return nextCleanInt == -1 ? '\0' : (char) nextCleanInt; 505 } 506 507 /** 508 * Returns the next {@code length} characters of the input. 509 * 510 * <p>The returned string shares its backing character array with this 511 * tokener's input string. If a reference to the returned string may be held 512 * indefinitely, you should use {@code new String(result)} to copy it first 513 * to avoid memory leaks. 514 * 515 * @throws JSONException if the remaining input is not long enough to 516 * satisfy this request. 517 */ next(int length)518 public String next(int length) throws JSONException { 519 if (pos + length > in.length()) { 520 throw syntaxError(length + " is out of bounds"); 521 } 522 String result = in.substring(pos, pos + length); 523 pos += length; 524 return result; 525 } 526 527 /** 528 * Returns the {@link String#trim trimmed} string holding the characters up 529 * to but not including the first of: 530 * <ul> 531 * <li>any character in {@code excluded} 532 * <li>a newline character '\n' 533 * <li>a carriage return '\r' 534 * </ul> 535 * 536 * <p>The returned string shares its backing character array with this 537 * tokener's input string. If a reference to the returned string may be held 538 * indefinitely, you should use {@code new String(result)} to copy it first 539 * to avoid memory leaks. 540 * 541 * @return a possibly-empty string 542 */ nextTo(String excluded)543 public String nextTo(String excluded) { 544 if (excluded == null) { 545 throw new NullPointerException("excluded == null"); 546 } 547 return nextToInternal(excluded).trim(); 548 } 549 550 /** 551 * Equivalent to {@code nextTo(String.valueOf(excluded))}. 552 */ nextTo(char excluded)553 public String nextTo(char excluded) { 554 return nextToInternal(String.valueOf(excluded)).trim(); 555 } 556 557 /** 558 * Advances past all input up to and including the next occurrence of 559 * {@code thru}. If the remaining input doesn't contain {@code thru}, the 560 * input is exhausted. 561 */ skipPast(String thru)562 public void skipPast(String thru) { 563 int thruStart = in.indexOf(thru, pos); 564 pos = thruStart == -1 ? in.length() : (thruStart + thru.length()); 565 } 566 567 /** 568 * Advances past all input up to but not including the next occurrence of 569 * {@code to}. If the remaining input doesn't contain {@code to}, the input 570 * is unchanged. 571 */ skipTo(char to)572 public char skipTo(char to) { 573 int index = in.indexOf(to, pos); 574 if (index != -1) { 575 pos = index; 576 return to; 577 } else { 578 return '\0'; 579 } 580 } 581 582 /** 583 * Unreads the most recent character of input. If no input characters have 584 * been read, the input is unchanged. 585 */ back()586 public void back() { 587 if (--pos == -1) { 588 pos = 0; 589 } 590 } 591 592 /** 593 * Returns the integer [0..15] value for the given hex character, or -1 594 * for non-hex input. 595 * 596 * @param hex a character in the ranges [0-9], [A-F] or [a-f]. Any other 597 * character will yield a -1 result. 598 */ dehexchar(char hex)599 public static int dehexchar(char hex) { 600 if (hex >= '0' && hex <= '9') { 601 return hex - '0'; 602 } else if (hex >= 'A' && hex <= 'F') { 603 return hex - 'A' + 10; 604 } else if (hex >= 'a' && hex <= 'f') { 605 return hex - 'a' + 10; 606 } else { 607 return -1; 608 } 609 } 610 } 611