1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.util; 18 19 import com.android.internal.util.StringPool; 20 21 import java.io.Closeable; 22 import java.io.EOFException; 23 import java.io.IOException; 24 import java.io.Reader; 25 import java.util.ArrayList; 26 import java.util.List; 27 28 29 /** 30 * Reads a JSON (<a href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>) 31 * encoded value as a stream of tokens. This stream includes both literal 32 * values (strings, numbers, booleans, and nulls) as well as the begin and 33 * end delimiters of objects and arrays. The tokens are traversed in 34 * depth-first order, the same order that they appear in the JSON document. 35 * Within JSON objects, name/value pairs are represented by a single token. 36 * 37 * <h3>Parsing JSON</h3> 38 * To create a recursive descent parser for your own JSON streams, first create 39 * an entry point method that creates a {@code JsonReader}. 40 * 41 * <p>Next, create handler methods for each structure in your JSON text. You'll 42 * need a method for each object type and for each array type. 43 * <ul> 44 * <li>Within <strong>array handling</strong> methods, first call {@link 45 * #beginArray} to consume the array's opening bracket. Then create a 46 * while loop that accumulates values, terminating when {@link #hasNext} 47 * is false. Finally, read the array's closing bracket by calling {@link 48 * #endArray}. 49 * <li>Within <strong>object handling</strong> methods, first call {@link 50 * #beginObject} to consume the object's opening brace. Then create a 51 * while loop that assigns values to local variables based on their name. 52 * This loop should terminate when {@link #hasNext} is false. Finally, 53 * read the object's closing brace by calling {@link #endObject}. 54 * </ul> 55 * <p>When a nested object or array is encountered, delegate to the 56 * corresponding handler method. 57 * 58 * <p>When an unknown name is encountered, strict parsers should fail with an 59 * exception. Lenient parsers should call {@link #skipValue()} to recursively 60 * skip the value's nested tokens, which may otherwise conflict. 61 * 62 * <p>If a value may be null, you should first check using {@link #peek()}. 63 * Null literals can be consumed using either {@link #nextNull()} or {@link 64 * #skipValue()}. 65 * 66 * <h3>Example</h3> 67 * Suppose we'd like to parse a stream of messages such as the following: <pre> {@code 68 * [ 69 * { 70 * "id": 912345678901, 71 * "text": "How do I read JSON on Android?", 72 * "geo": null, 73 * "user": { 74 * "name": "android_newb", 75 * "followers_count": 41 76 * } 77 * }, 78 * { 79 * "id": 912345678902, 80 * "text": "@android_newb just use android.util.JsonReader!", 81 * "geo": [50.454722, -104.606667], 82 * "user": { 83 * "name": "jesse", 84 * "followers_count": 2 85 * } 86 * } 87 * ]}</pre> 88 * This code implements the parser for the above structure: <pre> {@code 89 * 90 * public List<Message> readJsonStream(InputStream in) throws IOException { 91 * JsonReader reader = new JsonReader(new InputStreamReader(in, "UTF-8")); 92 * try { 93 * return readMessagesArray(reader); 94 * } finally { 95 * reader.close(); 96 * } 97 * } 98 * 99 * public List<Message> readMessagesArray(JsonReader reader) throws IOException { 100 * List<Message> messages = new ArrayList<Message>(); 101 * 102 * reader.beginArray(); 103 * while (reader.hasNext()) { 104 * messages.add(readMessage(reader)); 105 * } 106 * reader.endArray(); 107 * return messages; 108 * } 109 * 110 * public Message readMessage(JsonReader reader) throws IOException { 111 * long id = -1; 112 * String text = null; 113 * User user = null; 114 * List<Double> geo = null; 115 * 116 * reader.beginObject(); 117 * while (reader.hasNext()) { 118 * String name = reader.nextName(); 119 * if (name.equals("id")) { 120 * id = reader.nextLong(); 121 * } else if (name.equals("text")) { 122 * text = reader.nextString(); 123 * } else if (name.equals("geo") && reader.peek() != JsonToken.NULL) { 124 * geo = readDoublesArray(reader); 125 * } else if (name.equals("user")) { 126 * user = readUser(reader); 127 * } else { 128 * reader.skipValue(); 129 * } 130 * } 131 * reader.endObject(); 132 * return new Message(id, text, user, geo); 133 * } 134 * 135 * public List<Double> readDoublesArray(JsonReader reader) throws IOException { 136 * List<Double> doubles = new ArrayList<Double>(); 137 * 138 * reader.beginArray(); 139 * while (reader.hasNext()) { 140 * doubles.add(reader.nextDouble()); 141 * } 142 * reader.endArray(); 143 * return doubles; 144 * } 145 * 146 * public User readUser(JsonReader reader) throws IOException { 147 * String username = null; 148 * int followersCount = -1; 149 * 150 * reader.beginObject(); 151 * while (reader.hasNext()) { 152 * String name = reader.nextName(); 153 * if (name.equals("name")) { 154 * username = reader.nextString(); 155 * } else if (name.equals("followers_count")) { 156 * followersCount = reader.nextInt(); 157 * } else { 158 * reader.skipValue(); 159 * } 160 * } 161 * reader.endObject(); 162 * return new User(username, followersCount); 163 * }}</pre> 164 * 165 * <h3>Number Handling</h3> 166 * This reader permits numeric values to be read as strings and string values to 167 * be read as numbers. For example, both elements of the JSON array {@code 168 * [1, "1"]} may be read using either {@link #nextInt} or {@link #nextString}. 169 * This behavior is intended to prevent lossy numeric conversions: double is 170 * JavaScript's only numeric type and very large values like {@code 171 * 9007199254740993} cannot be represented exactly on that platform. To minimize 172 * precision loss, extremely large values should be written and read as strings 173 * in JSON. 174 * 175 * <p>Each {@code JsonReader} may be used to read a single JSON stream. Instances 176 * of this class are not thread safe. 177 */ 178 @android.ravenwood.annotation.RavenwoodKeepWholeClass 179 public final class JsonReader implements Closeable { 180 181 private static final String TRUE = "true"; 182 private static final String FALSE = "false"; 183 184 private final StringPool stringPool = new StringPool(); 185 186 /** The input JSON. */ 187 private final Reader in; 188 189 /** True to accept non-spec compliant JSON */ 190 private boolean lenient = false; 191 192 /** 193 * Use a manual buffer to easily read and unread upcoming characters, and 194 * also so we can create strings without an intermediate StringBuilder. 195 * We decode literals directly out of this buffer, so it must be at least as 196 * long as the longest token that can be reported as a number. 197 */ 198 private final char[] buffer = new char[1024]; 199 private int pos = 0; 200 private int limit = 0; 201 202 /* 203 * The offset of the first character in the buffer. 204 */ 205 private int bufferStartLine = 1; 206 private int bufferStartColumn = 1; 207 208 private final List<JsonScope> stack = new ArrayList<JsonScope>(); 209 { 210 push(JsonScope.EMPTY_DOCUMENT); 211 } 212 213 /** 214 * The type of the next token to be returned by {@link #peek} and {@link 215 * #advance}. If null, peek() will assign a value. 216 */ 217 private JsonToken token; 218 219 /** The text of the next name. */ 220 private String name; 221 222 /* 223 * For the next literal value, we may have the text value, or the position 224 * and length in the buffer. 225 */ 226 private String value; 227 private int valuePos; 228 private int valueLength; 229 230 /** True if we're currently handling a skipValue() call. */ 231 private boolean skipping = false; 232 233 /** 234 * Creates a new instance that reads a JSON-encoded stream from {@code in}. 235 */ JsonReader(Reader in)236 public JsonReader(Reader in) { 237 if (in == null) { 238 throw new NullPointerException("in == null"); 239 } 240 this.in = in; 241 } 242 243 /** 244 * Configure this parser to be be liberal in what it accepts. By default, 245 * this parser is strict and only accepts JSON as specified by <a 246 * href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>. Setting the 247 * parser to lenient causes it to ignore the following syntax errors: 248 * 249 * <ul> 250 * <li>End of line comments starting with {@code //} or {@code #} and 251 * ending with a newline character. 252 * <li>C-style comments starting with {@code /*} and ending with 253 * {@code *}{@code /}. Such comments may not be nested. 254 * <li>Names that are unquoted or {@code 'single quoted'}. 255 * <li>Strings that are unquoted or {@code 'single quoted'}. 256 * <li>Array elements separated by {@code ;} instead of {@code ,}. 257 * <li>Unnecessary array separators. These are interpreted as if null 258 * was the omitted value. 259 * <li>Names and values separated by {@code =} or {@code =>} instead of 260 * {@code :}. 261 * <li>Name/value pairs separated by {@code ;} instead of {@code ,}. 262 * </ul> 263 */ setLenient(boolean lenient)264 public void setLenient(boolean lenient) { 265 this.lenient = lenient; 266 } 267 268 /** 269 * Returns true if this parser is liberal in what it accepts. 270 */ isLenient()271 public boolean isLenient() { 272 return lenient; 273 } 274 275 /** 276 * Consumes the next token from the JSON stream and asserts that it is the 277 * beginning of a new array. 278 */ beginArray()279 public void beginArray() throws IOException { 280 expect(JsonToken.BEGIN_ARRAY); 281 } 282 283 /** 284 * Consumes the next token from the JSON stream and asserts that it is the 285 * end of the current array. 286 */ endArray()287 public void endArray() throws IOException { 288 expect(JsonToken.END_ARRAY); 289 } 290 291 /** 292 * Consumes the next token from the JSON stream and asserts that it is the 293 * beginning of a new object. 294 */ beginObject()295 public void beginObject() throws IOException { 296 expect(JsonToken.BEGIN_OBJECT); 297 } 298 299 /** 300 * Consumes the next token from the JSON stream and asserts that it is the 301 * end of the current object. 302 */ endObject()303 public void endObject() throws IOException { 304 expect(JsonToken.END_OBJECT); 305 } 306 307 /** 308 * Consumes {@code expected}. 309 */ expect(JsonToken expected)310 private void expect(JsonToken expected) throws IOException { 311 peek(); 312 if (token != expected) { 313 throw new IllegalStateException("Expected " + expected + " but was " + peek()); 314 } 315 advance(); 316 } 317 318 /** 319 * Returns true if the current array or object has another element. 320 */ hasNext()321 public boolean hasNext() throws IOException { 322 peek(); 323 return token != JsonToken.END_OBJECT && token != JsonToken.END_ARRAY; 324 } 325 326 /** 327 * Returns the type of the next token without consuming it. 328 */ peek()329 public JsonToken peek() throws IOException { 330 if (token != null) { 331 return token; 332 } 333 334 switch (peekStack()) { 335 case EMPTY_DOCUMENT: 336 replaceTop(JsonScope.NONEMPTY_DOCUMENT); 337 JsonToken firstToken = nextValue(); 338 if (!lenient && token != JsonToken.BEGIN_ARRAY && token != JsonToken.BEGIN_OBJECT) { 339 throw new IOException( 340 "Expected JSON document to start with '[' or '{' but was " + token); 341 } 342 return firstToken; 343 case EMPTY_ARRAY: 344 return nextInArray(true); 345 case NONEMPTY_ARRAY: 346 return nextInArray(false); 347 case EMPTY_OBJECT: 348 return nextInObject(true); 349 case DANGLING_NAME: 350 return objectValue(); 351 case NONEMPTY_OBJECT: 352 return nextInObject(false); 353 case NONEMPTY_DOCUMENT: 354 try { 355 JsonToken token = nextValue(); 356 if (lenient) { 357 return token; 358 } 359 throw syntaxError("Expected EOF"); 360 } catch (EOFException e) { 361 return token = JsonToken.END_DOCUMENT; // TODO: avoid throwing here? 362 } 363 case CLOSED: 364 throw new IllegalStateException("JsonReader is closed"); 365 default: 366 throw new AssertionError(); 367 } 368 } 369 370 /** 371 * Advances the cursor in the JSON stream to the next token. 372 */ advance()373 private JsonToken advance() throws IOException { 374 peek(); 375 376 JsonToken result = token; 377 token = null; 378 value = null; 379 name = null; 380 return result; 381 } 382 383 /** 384 * Returns the next token, a {@link JsonToken#NAME property name}, and 385 * consumes it. 386 * 387 * @throws IOException if the next token in the stream is not a property 388 * name. 389 */ nextName()390 public String nextName() throws IOException { 391 peek(); 392 if (token != JsonToken.NAME) { 393 throw new IllegalStateException("Expected a name but was " + peek()); 394 } 395 String result = name; 396 advance(); 397 return result; 398 } 399 400 /** 401 * Returns the {@link JsonToken#STRING string} value of the next token, 402 * consuming it. If the next token is a number, this method will return its 403 * string form. 404 * 405 * @throws IllegalStateException if the next token is not a string or if 406 * this reader is closed. 407 */ nextString()408 public String nextString() throws IOException { 409 peek(); 410 if (token != JsonToken.STRING && token != JsonToken.NUMBER) { 411 throw new IllegalStateException("Expected a string but was " + peek()); 412 } 413 414 String result = value; 415 advance(); 416 return result; 417 } 418 419 /** 420 * Returns the {@link JsonToken#BOOLEAN boolean} value of the next token, 421 * consuming it. 422 * 423 * @throws IllegalStateException if the next token is not a boolean or if 424 * this reader is closed. 425 */ nextBoolean()426 public boolean nextBoolean() throws IOException { 427 peek(); 428 if (token != JsonToken.BOOLEAN) { 429 throw new IllegalStateException("Expected a boolean but was " + token); 430 } 431 432 boolean result = (value == TRUE); 433 advance(); 434 return result; 435 } 436 437 /** 438 * Consumes the next token from the JSON stream and asserts that it is a 439 * literal null. 440 * 441 * @throws IllegalStateException if the next token is not null or if this 442 * reader is closed. 443 */ nextNull()444 public void nextNull() throws IOException { 445 peek(); 446 if (token != JsonToken.NULL) { 447 throw new IllegalStateException("Expected null but was " + token); 448 } 449 450 advance(); 451 } 452 453 /** 454 * Returns the {@link JsonToken#NUMBER double} value of the next token, 455 * consuming it. If the next token is a string, this method will attempt to 456 * parse it as a double using {@link Double#parseDouble(String)}. 457 * 458 * @throws IllegalStateException if the next token is not a literal value. 459 */ nextDouble()460 public double nextDouble() throws IOException { 461 peek(); 462 if (token != JsonToken.STRING && token != JsonToken.NUMBER) { 463 throw new IllegalStateException("Expected a double but was " + token); 464 } 465 466 double result = Double.parseDouble(value); 467 advance(); 468 return result; 469 } 470 471 /** 472 * Returns the {@link JsonToken#NUMBER long} value of the next token, 473 * consuming it. If the next token is a string, this method will attempt to 474 * parse it as a long. If the next token's numeric value cannot be exactly 475 * represented by a Java {@code long}, this method throws. 476 * 477 * @throws IllegalStateException if the next token is not a literal value. 478 * @throws NumberFormatException if the next literal value cannot be parsed 479 * as a number, or exactly represented as a long. 480 */ nextLong()481 public long nextLong() throws IOException { 482 peek(); 483 if (token != JsonToken.STRING && token != JsonToken.NUMBER) { 484 throw new IllegalStateException("Expected a long but was " + token); 485 } 486 487 long result; 488 try { 489 result = Long.parseLong(value); 490 } catch (NumberFormatException ignored) { 491 double asDouble = Double.parseDouble(value); // don't catch this NumberFormatException 492 result = (long) asDouble; 493 if ((double) result != asDouble) { 494 throw new NumberFormatException(value); 495 } 496 } 497 498 advance(); 499 return result; 500 } 501 502 /** 503 * Returns the {@link JsonToken#NUMBER int} value of the next token, 504 * consuming it. If the next token is a string, this method will attempt to 505 * parse it as an int. If the next token's numeric value cannot be exactly 506 * represented by a Java {@code int}, this method throws. 507 * 508 * @throws IllegalStateException if the next token is not a literal value. 509 * @throws NumberFormatException if the next literal value cannot be parsed 510 * as a number, or exactly represented as an int. 511 */ nextInt()512 public int nextInt() throws IOException { 513 peek(); 514 if (token != JsonToken.STRING && token != JsonToken.NUMBER) { 515 throw new IllegalStateException("Expected an int but was " + token); 516 } 517 518 int result; 519 try { 520 result = Integer.parseInt(value); 521 } catch (NumberFormatException ignored) { 522 double asDouble = Double.parseDouble(value); // don't catch this NumberFormatException 523 result = (int) asDouble; 524 if ((double) result != asDouble) { 525 throw new NumberFormatException(value); 526 } 527 } 528 529 advance(); 530 return result; 531 } 532 533 /** 534 * Closes this JSON reader and the underlying {@link Reader}. 535 */ close()536 public void close() throws IOException { 537 value = null; 538 token = null; 539 stack.clear(); 540 stack.add(JsonScope.CLOSED); 541 in.close(); 542 } 543 544 /** 545 * Skips the next value recursively. If it is an object or array, all nested 546 * elements are skipped. This method is intended for use when the JSON token 547 * stream contains unrecognized or unhandled values. 548 */ skipValue()549 public void skipValue() throws IOException { 550 skipping = true; 551 try { 552 if (!hasNext() || peek() == JsonToken.END_DOCUMENT) { 553 throw new IllegalStateException("No element left to skip"); 554 } 555 int count = 0; 556 do { 557 JsonToken token = advance(); 558 if (token == JsonToken.BEGIN_ARRAY || token == JsonToken.BEGIN_OBJECT) { 559 count++; 560 } else if (token == JsonToken.END_ARRAY || token == JsonToken.END_OBJECT) { 561 count--; 562 } 563 } while (count != 0); 564 } finally { 565 skipping = false; 566 } 567 } 568 peekStack()569 private JsonScope peekStack() { 570 return stack.get(stack.size() - 1); 571 } 572 pop()573 private JsonScope pop() { 574 return stack.remove(stack.size() - 1); 575 } 576 push(JsonScope newTop)577 private void push(JsonScope newTop) { 578 stack.add(newTop); 579 } 580 581 /** 582 * Replace the value on the top of the stack with the given value. 583 */ replaceTop(JsonScope newTop)584 private void replaceTop(JsonScope newTop) { 585 stack.set(stack.size() - 1, newTop); 586 } 587 nextInArray(boolean firstElement)588 private JsonToken nextInArray(boolean firstElement) throws IOException { 589 if (firstElement) { 590 replaceTop(JsonScope.NONEMPTY_ARRAY); 591 } else { 592 /* Look for a comma before each element after the first element. */ 593 switch (nextNonWhitespace()) { 594 case ']': 595 pop(); 596 return token = JsonToken.END_ARRAY; 597 case ';': 598 checkLenient(); // fall-through 599 case ',': 600 break; 601 default: 602 throw syntaxError("Unterminated array"); 603 } 604 } 605 606 switch (nextNonWhitespace()) { 607 case ']': 608 if (firstElement) { 609 pop(); 610 return token = JsonToken.END_ARRAY; 611 } 612 // fall-through to handle ",]" 613 case ';': 614 case ',': 615 /* In lenient mode, a 0-length literal means 'null' */ 616 checkLenient(); 617 pos--; 618 value = "null"; 619 return token = JsonToken.NULL; 620 default: 621 pos--; 622 return nextValue(); 623 } 624 } 625 nextInObject(boolean firstElement)626 private JsonToken nextInObject(boolean firstElement) throws IOException { 627 /* 628 * Read delimiters. Either a comma/semicolon separating this and the 629 * previous name-value pair, or a close brace to denote the end of the 630 * object. 631 */ 632 if (firstElement) { 633 /* Peek to see if this is the empty object. */ 634 switch (nextNonWhitespace()) { 635 case '}': 636 pop(); 637 return token = JsonToken.END_OBJECT; 638 default: 639 pos--; 640 } 641 } else { 642 switch (nextNonWhitespace()) { 643 case '}': 644 pop(); 645 return token = JsonToken.END_OBJECT; 646 case ';': 647 case ',': 648 break; 649 default: 650 throw syntaxError("Unterminated object"); 651 } 652 } 653 654 /* Read the name. */ 655 int quote = nextNonWhitespace(); 656 switch (quote) { 657 case '\'': 658 checkLenient(); // fall-through 659 case '"': 660 name = nextString((char) quote); 661 break; 662 default: 663 checkLenient(); 664 pos--; 665 name = nextLiteral(false); 666 if (name.isEmpty()) { 667 throw syntaxError("Expected name"); 668 } 669 } 670 671 replaceTop(JsonScope.DANGLING_NAME); 672 return token = JsonToken.NAME; 673 } 674 objectValue()675 private JsonToken objectValue() throws IOException { 676 /* 677 * Read the name/value separator. Usually a colon ':'. In lenient mode 678 * we also accept an equals sign '=', or an arrow "=>". 679 */ 680 switch (nextNonWhitespace()) { 681 case ':': 682 break; 683 case '=': 684 checkLenient(); 685 if ((pos < limit || fillBuffer(1)) && buffer[pos] == '>') { 686 pos++; 687 } 688 break; 689 default: 690 throw syntaxError("Expected ':'"); 691 } 692 693 replaceTop(JsonScope.NONEMPTY_OBJECT); 694 return nextValue(); 695 } 696 nextValue()697 private JsonToken nextValue() throws IOException { 698 int c = nextNonWhitespace(); 699 switch (c) { 700 case '{': 701 push(JsonScope.EMPTY_OBJECT); 702 return token = JsonToken.BEGIN_OBJECT; 703 704 case '[': 705 push(JsonScope.EMPTY_ARRAY); 706 return token = JsonToken.BEGIN_ARRAY; 707 708 case '\'': 709 checkLenient(); // fall-through 710 case '"': 711 value = nextString((char) c); 712 return token = JsonToken.STRING; 713 714 default: 715 pos--; 716 return readLiteral(); 717 } 718 } 719 720 /** 721 * Returns true once {@code limit - pos >= minimum}. If the data is 722 * exhausted before that many characters are available, this returns 723 * false. 724 */ fillBuffer(int minimum)725 private boolean fillBuffer(int minimum) throws IOException { 726 // Before clobbering the old characters, update where buffer starts 727 for (int i = 0; i < pos; i++) { 728 if (buffer[i] == '\n') { 729 bufferStartLine++; 730 bufferStartColumn = 1; 731 } else { 732 bufferStartColumn++; 733 } 734 } 735 736 if (limit != pos) { 737 limit -= pos; 738 System.arraycopy(buffer, pos, buffer, 0, limit); 739 } else { 740 limit = 0; 741 } 742 743 pos = 0; 744 int total; 745 while ((total = in.read(buffer, limit, buffer.length - limit)) != -1) { 746 limit += total; 747 748 // if this is the first read, consume an optional byte order mark (BOM) if it exists 749 if (bufferStartLine == 1 && bufferStartColumn == 1 750 && limit > 0 && buffer[0] == '\ufeff') { 751 pos++; 752 bufferStartColumn--; 753 } 754 755 if (limit >= minimum) { 756 return true; 757 } 758 } 759 return false; 760 } 761 getLineNumber()762 private int getLineNumber() { 763 int result = bufferStartLine; 764 for (int i = 0; i < pos; i++) { 765 if (buffer[i] == '\n') { 766 result++; 767 } 768 } 769 return result; 770 } 771 getColumnNumber()772 private int getColumnNumber() { 773 int result = bufferStartColumn; 774 for (int i = 0; i < pos; i++) { 775 if (buffer[i] == '\n') { 776 result = 1; 777 } else { 778 result++; 779 } 780 } 781 return result; 782 } 783 nextNonWhitespace()784 private int nextNonWhitespace() throws IOException { 785 while (pos < limit || fillBuffer(1)) { 786 int c = buffer[pos++]; 787 switch (c) { 788 case '\t': 789 case ' ': 790 case '\n': 791 case '\r': 792 continue; 793 794 case '/': 795 if (pos == limit && !fillBuffer(1)) { 796 return c; 797 } 798 799 checkLenient(); 800 char peek = buffer[pos]; 801 switch (peek) { 802 case '*': 803 // skip a /* c-style comment */ 804 pos++; 805 if (!skipTo("*/")) { 806 throw syntaxError("Unterminated comment"); 807 } 808 pos += 2; 809 continue; 810 811 case '/': 812 // skip a // end-of-line comment 813 pos++; 814 skipToEndOfLine(); 815 continue; 816 817 default: 818 return c; 819 } 820 821 case '#': 822 /* 823 * Skip a # hash end-of-line comment. The JSON RFC doesn't 824 * specify this behaviour, but it's required to parse 825 * existing documents. See http://b/2571423. 826 */ 827 checkLenient(); 828 skipToEndOfLine(); 829 continue; 830 831 default: 832 return c; 833 } 834 } 835 836 throw new EOFException("End of input"); 837 } 838 checkLenient()839 private void checkLenient() throws IOException { 840 if (!lenient) { 841 throw syntaxError("Use JsonReader.setLenient(true) to accept malformed JSON"); 842 } 843 } 844 845 /** 846 * Advances the position until after the next newline character. If the line 847 * is terminated by "\r\n", the '\n' must be consumed as whitespace by the 848 * caller. 849 */ skipToEndOfLine()850 private void skipToEndOfLine() throws IOException { 851 while (pos < limit || fillBuffer(1)) { 852 char c = buffer[pos++]; 853 if (c == '\r' || c == '\n') { 854 break; 855 } 856 } 857 } 858 skipTo(String toFind)859 private boolean skipTo(String toFind) throws IOException { 860 outer: 861 for (; pos + toFind.length() <= limit || fillBuffer(toFind.length()); pos++) { 862 for (int c = 0; c < toFind.length(); c++) { 863 if (buffer[pos + c] != toFind.charAt(c)) { 864 continue outer; 865 } 866 } 867 return true; 868 } 869 return false; 870 } 871 872 /** 873 * Returns the string up to but not including {@code quote}, unescaping any 874 * character escape sequences encountered along the way. The opening quote 875 * should have already been read. This consumes the closing quote, but does 876 * not include it in the returned string. 877 * 878 * @param quote either ' or ". 879 * @throws NumberFormatException if any unicode escape sequences are 880 * malformed. 881 */ nextString(char quote)882 private String nextString(char quote) throws IOException { 883 StringBuilder builder = null; 884 do { 885 /* the index of the first character not yet appended to the builder. */ 886 int start = pos; 887 while (pos < limit) { 888 int c = buffer[pos++]; 889 890 if (c == quote) { 891 if (skipping) { 892 return "skipped!"; 893 } else if (builder == null) { 894 return stringPool.get(buffer, start, pos - start - 1); 895 } else { 896 builder.append(buffer, start, pos - start - 1); 897 return builder.toString(); 898 } 899 900 } else if (c == '\\') { 901 if (builder == null) { 902 builder = new StringBuilder(); 903 } 904 builder.append(buffer, start, pos - start - 1); 905 builder.append(readEscapeCharacter()); 906 start = pos; 907 } 908 } 909 910 if (builder == null) { 911 builder = new StringBuilder(); 912 } 913 builder.append(buffer, start, pos - start); 914 } while (fillBuffer(1)); 915 916 throw syntaxError("Unterminated string"); 917 } 918 919 /** 920 * Reads the value up to but not including any delimiter characters. This 921 * does not consume the delimiter character. 922 * 923 * @param assignOffsetsOnly true for this method to only set the valuePos 924 * and valueLength fields and return a null result. This only works if 925 * the literal is short; a string is returned otherwise. 926 */ nextLiteral(boolean assignOffsetsOnly)927 private String nextLiteral(boolean assignOffsetsOnly) throws IOException { 928 StringBuilder builder = null; 929 valuePos = -1; 930 valueLength = 0; 931 int i = 0; 932 933 findNonLiteralCharacter: 934 while (true) { 935 for (; pos + i < limit; i++) { 936 switch (buffer[pos + i]) { 937 case '/': 938 case '\\': 939 case ';': 940 case '#': 941 case '=': 942 checkLenient(); // fall-through 943 case '{': 944 case '}': 945 case '[': 946 case ']': 947 case ':': 948 case ',': 949 case ' ': 950 case '\t': 951 case '\f': 952 case '\r': 953 case '\n': 954 break findNonLiteralCharacter; 955 } 956 } 957 958 /* 959 * Attempt to load the entire literal into the buffer at once. If 960 * we run out of input, add a non-literal character at the end so 961 * that decoding doesn't need to do bounds checks. 962 */ 963 if (i < buffer.length) { 964 if (fillBuffer(i + 1)) { 965 continue; 966 } else { 967 buffer[limit] = '\0'; 968 break; 969 } 970 } 971 972 // use a StringBuilder when the value is too long. It must be an unquoted string. 973 if (builder == null) { 974 builder = new StringBuilder(); 975 } 976 builder.append(buffer, pos, i); 977 valueLength += i; 978 pos += i; 979 i = 0; 980 if (!fillBuffer(1)) { 981 break; 982 } 983 } 984 985 String result; 986 if (assignOffsetsOnly && builder == null) { 987 valuePos = pos; 988 result = null; 989 } else if (skipping) { 990 result = "skipped!"; 991 } else if (builder == null) { 992 result = stringPool.get(buffer, pos, i); 993 } else { 994 builder.append(buffer, pos, i); 995 result = builder.toString(); 996 } 997 valueLength += i; 998 pos += i; 999 return result; 1000 } 1001 toString()1002 @Override public String toString() { 1003 return getClass().getSimpleName() + " near " + getSnippet(); 1004 } 1005 1006 /** 1007 * Unescapes the character identified by the character or characters that 1008 * immediately follow a backslash. The backslash '\' should have already 1009 * been read. This supports both unicode escapes "u000A" and two-character 1010 * escapes "\n". 1011 * 1012 * @throws NumberFormatException if any unicode escape sequences are 1013 * malformed. 1014 */ readEscapeCharacter()1015 private char readEscapeCharacter() throws IOException { 1016 if (pos == limit && !fillBuffer(1)) { 1017 throw syntaxError("Unterminated escape sequence"); 1018 } 1019 1020 char escaped = buffer[pos++]; 1021 switch (escaped) { 1022 case 'u': 1023 if (pos + 4 > limit && !fillBuffer(4)) { 1024 throw syntaxError("Unterminated escape sequence"); 1025 } 1026 String hex = stringPool.get(buffer, pos, 4); 1027 pos += 4; 1028 return (char) Integer.parseInt(hex, 16); 1029 1030 case 't': 1031 return '\t'; 1032 1033 case 'b': 1034 return '\b'; 1035 1036 case 'n': 1037 return '\n'; 1038 1039 case 'r': 1040 return '\r'; 1041 1042 case 'f': 1043 return '\f'; 1044 1045 case '\'': 1046 case '"': 1047 case '\\': 1048 default: 1049 return escaped; 1050 } 1051 } 1052 1053 /** 1054 * Reads a null, boolean, numeric or unquoted string literal value. 1055 */ readLiteral()1056 private JsonToken readLiteral() throws IOException { 1057 value = nextLiteral(true); 1058 if (valueLength == 0) { 1059 throw syntaxError("Expected literal value"); 1060 } 1061 token = decodeLiteral(); 1062 if (token == JsonToken.STRING) { 1063 checkLenient(); 1064 } 1065 return token; 1066 } 1067 1068 /** 1069 * Assigns {@code nextToken} based on the value of {@code nextValue}. 1070 */ decodeLiteral()1071 private JsonToken decodeLiteral() throws IOException { 1072 if (valuePos == -1) { 1073 // it was too long to fit in the buffer so it can only be a string 1074 return JsonToken.STRING; 1075 } else if (valueLength == 4 1076 && ('n' == buffer[valuePos ] || 'N' == buffer[valuePos ]) 1077 && ('u' == buffer[valuePos + 1] || 'U' == buffer[valuePos + 1]) 1078 && ('l' == buffer[valuePos + 2] || 'L' == buffer[valuePos + 2]) 1079 && ('l' == buffer[valuePos + 3] || 'L' == buffer[valuePos + 3])) { 1080 value = "null"; 1081 return JsonToken.NULL; 1082 } else if (valueLength == 4 1083 && ('t' == buffer[valuePos ] || 'T' == buffer[valuePos ]) 1084 && ('r' == buffer[valuePos + 1] || 'R' == buffer[valuePos + 1]) 1085 && ('u' == buffer[valuePos + 2] || 'U' == buffer[valuePos + 2]) 1086 && ('e' == buffer[valuePos + 3] || 'E' == buffer[valuePos + 3])) { 1087 value = TRUE; 1088 return JsonToken.BOOLEAN; 1089 } else if (valueLength == 5 1090 && ('f' == buffer[valuePos ] || 'F' == buffer[valuePos ]) 1091 && ('a' == buffer[valuePos + 1] || 'A' == buffer[valuePos + 1]) 1092 && ('l' == buffer[valuePos + 2] || 'L' == buffer[valuePos + 2]) 1093 && ('s' == buffer[valuePos + 3] || 'S' == buffer[valuePos + 3]) 1094 && ('e' == buffer[valuePos + 4] || 'E' == buffer[valuePos + 4])) { 1095 value = FALSE; 1096 return JsonToken.BOOLEAN; 1097 } else { 1098 value = stringPool.get(buffer, valuePos, valueLength); 1099 return decodeNumber(buffer, valuePos, valueLength); 1100 } 1101 } 1102 1103 /** 1104 * Determine whether the characters is a JSON number. Numbers are of the 1105 * form -12.34e+56. Fractional and exponential parts are optional. Leading 1106 * zeroes are not allowed in the value or exponential part, but are allowed 1107 * in the fraction. 1108 */ decodeNumber(char[] chars, int offset, int length)1109 private JsonToken decodeNumber(char[] chars, int offset, int length) { 1110 int i = offset; 1111 int c = chars[i]; 1112 1113 if (c == '-') { 1114 c = chars[++i]; 1115 } 1116 1117 if (c == '0') { 1118 c = chars[++i]; 1119 } else if (c >= '1' && c <= '9') { 1120 c = chars[++i]; 1121 while (c >= '0' && c <= '9') { 1122 c = chars[++i]; 1123 } 1124 } else { 1125 return JsonToken.STRING; 1126 } 1127 1128 if (c == '.') { 1129 c = chars[++i]; 1130 while (c >= '0' && c <= '9') { 1131 c = chars[++i]; 1132 } 1133 } 1134 1135 if (c == 'e' || c == 'E') { 1136 c = chars[++i]; 1137 if (c == '+' || c == '-') { 1138 c = chars[++i]; 1139 } 1140 if (c >= '0' && c <= '9') { 1141 c = chars[++i]; 1142 while (c >= '0' && c <= '9') { 1143 c = chars[++i]; 1144 } 1145 } else { 1146 return JsonToken.STRING; 1147 } 1148 } 1149 1150 if (i == offset + length) { 1151 return JsonToken.NUMBER; 1152 } else { 1153 return JsonToken.STRING; 1154 } 1155 } 1156 1157 /** 1158 * Throws a new IO exception with the given message and a context snippet 1159 * with this reader's content. 1160 */ syntaxError(String message)1161 private IOException syntaxError(String message) throws IOException { 1162 throw new MalformedJsonException(message 1163 + " at line " + getLineNumber() + " column " + getColumnNumber()); 1164 } 1165 getSnippet()1166 private CharSequence getSnippet() { 1167 StringBuilder snippet = new StringBuilder(); 1168 int beforePos = Math.min(pos, 20); 1169 snippet.append(buffer, pos - beforePos, beforePos); 1170 int afterPos = Math.min(limit - pos, 20); 1171 snippet.append(buffer, pos, afterPos); 1172 return snippet; 1173 } 1174 } 1175