1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.json.stream; 18 19 import java.io.IOException; 20 import java.io.Reader; 21 import java.io.Closeable; 22 import java.util.ArrayList; 23 import java.util.List; 24 25 /** 26 * Reads a JSON (<a href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>) 27 * encoded value as a stream of tokens. This stream includes both literal 28 * values (strings, numbers, booleans, and nulls) as well as the begin and 29 * end delimiters of objects and arrays. The tokens are traversed in 30 * depth-first order, the same order that they appear in the JSON document. 31 * Within JSON objects, name/value pairs are represented by a single token. 32 * 33 * <h3>Parsing JSON</h3> 34 * To create a recursive descent parser for your own JSON streams, first create 35 * an entry point method that creates a {@code JsonReader}. 36 * 37 * <p>Next, create handler methods for each structure in your JSON text. You'll 38 * need a method for each object type and for each array type. 39 * <ul> 40 * <li>Within <strong>array handling</strong> methods, first call {@link 41 * #beginArray} to consume the array's opening bracket. Then create a 42 * while loop that accumulates values, terminating when {@link #hasNext} 43 * is false. Finally, read the array's closing bracket by calling {@link 44 * #endArray}. 45 * <li>Within <strong>object handling</strong> methods, first call {@link 46 * #beginObject} to consume the object's opening brace. Then create a 47 * while loop that assigns values to local variables based on their name. 48 * This loop should terminate when {@link #hasNext} is false. Finally, 49 * read the object's closing brace by calling {@link #endObject}. 50 * </ul> 51 * <p>When a nested object or array is encountered, delegate to the 52 * corresponding handler method. 53 * 54 * <p>When an unknown name is encountered, strict parsers should fail with an 55 * exception. Lenient parsers should call {@link #skipValue()} to recursively 56 * skip the value's nested tokens, which may otherwise conflict. 57 * 58 * <p>If a value may be null, you should first check using {@link #peek()}. 59 * Null literals can be consumed using either {@link #nextNull()} or {@link 60 * #skipValue()}. 61 * 62 * <h3>Example</h3> 63 * Suppose we'd like to parse a stream of messages such as the following: <pre> {@code 64 * [ 65 * { 66 * "id": 912345678901, 67 * "text": "How do I read JSON on Android?", 68 * "geo": null, 69 * "user": { 70 * "name": "android_newb", 71 * "followers_count": 41 72 * } 73 * }, 74 * { 75 * "id": 912345678902, 76 * "text": "@android_newb just use android.util.JsonReader!", 77 * "geo": [50.454722, -104.606667], 78 * "user": { 79 * "name": "jesse", 80 * "followers_count": 2 81 * } 82 * } 83 * ]}</pre> 84 * This code implements the parser for the above structure: <pre> {@code 85 * 86 * public List<Message> readJsonStream(InputStream in) throws IOException { 87 * JsonReader reader = new JsonReader(new InputStreamReader(in, "UTF-8")); 88 * return readMessagesArray(reader); 89 * } 90 * 91 * public List<Message> readMessagesArray(JsonReader reader) throws IOException { 92 * List<Message> messages = new ArrayList<Message>(); 93 * 94 * reader.beginArray(); 95 * while (reader.hasNext()) { 96 * messages.add(readMessage(reader)); 97 * } 98 * reader.endArray(); 99 * return messages; 100 * } 101 * 102 * public Message readMessage(JsonReader reader) throws IOException { 103 * long id = -1; 104 * String text = null; 105 * User user = null; 106 * List<Double> geo = null; 107 * 108 * reader.beginObject(); 109 * while (reader.hasNext()) { 110 * String name = reader.nextName(); 111 * if (name.equals("id")) { 112 * id = reader.nextLong(); 113 * } else if (name.equals("text")) { 114 * text = reader.nextString(); 115 * } else if (name.equals("geo") && reader.peek() != JsonToken.NULL) { 116 * geo = readDoublesArray(reader); 117 * } else if (name.equals("user")) { 118 * user = readUser(reader); 119 * } else { 120 * reader.skipValue(); 121 * } 122 * } 123 * reader.endObject(); 124 * return new Message(id, text, user, geo); 125 * } 126 * 127 * public List<Double> readDoublesArray(JsonReader reader) throws IOException { 128 * List<Double> doubles = new ArrayList<Double>(); 129 * 130 * reader.beginArray(); 131 * while (reader.hasNext()) { 132 * doubles.add(reader.nextDouble()); 133 * } 134 * reader.endArray(); 135 * return doubles; 136 * } 137 * 138 * public User readUser(JsonReader reader) throws IOException { 139 * String username = null; 140 * int followersCount = -1; 141 * 142 * reader.beginObject(); 143 * while (reader.hasNext()) { 144 * String name = reader.nextName(); 145 * if (name.equals("name")) { 146 * username = reader.nextString(); 147 * } else if (name.equals("followers_count")) { 148 * followersCount = reader.nextInt(); 149 * } else { 150 * reader.skipValue(); 151 * } 152 * } 153 * reader.endObject(); 154 * return new User(username, followersCount); 155 * }}</pre> 156 * 157 * <h3>Number Handling</h3> 158 * This reader permits numeric values to be read as strings and string values to 159 * be read as numbers. For example, both elements of the JSON array {@code 160 * [1, "1"]} may be read using either {@link #nextInt} or {@link #nextString}. 161 * This behavior is intended to prevent lossy numeric conversions: double is 162 * JavaScript's only numeric type and very large values like {@code 163 * 9007199254740993} cannot be represented exactly on that platform. To minimize 164 * precision loss, extremely large values should be written and read as strings 165 * in JSON. 166 * 167 * <p>Each {@code JsonReader} may be used to read a single JSON stream. Instances 168 * of this class are not thread safe. 169 */ 170 public final class JsonReader implements Closeable { 171 172 private static final String TRUE = "true"; 173 private static final String FALSE = "false"; 174 175 /** The input JSON. */ 176 private final Reader in; 177 178 /** True to accept non-spec compliant JSON */ 179 private boolean lenient = false; 180 181 /** 182 * Use a manual buffer to easily read and unread upcoming characters, and 183 * also so we can create strings without an intermediate StringBuilder. 184 * We decode literals directly out of this buffer, so it must be at least as 185 * long as the longest token that can be reported as a number. 186 */ 187 private final char[] buffer = new char[1024]; 188 private int pos = 0; 189 private int limit = 0; 190 191 private final List<JsonScope> stack = new ArrayList<JsonScope>(); 192 { 193 push(JsonScope.EMPTY_DOCUMENT); 194 } 195 196 /** 197 * The type of the next token to be returned by {@link #peek} and {@link 198 * #advance}. If null, peek() will assign a value. 199 */ 200 private JsonToken token; 201 202 /** The text of the next name. */ 203 private String name; 204 205 /* 206 * For the next literal value, we may have the text value, or the position 207 * and length in the buffer. 208 */ 209 private String value; 210 private int valuePos; 211 private int valueLength; 212 213 /** True if we're currently handling a skipValue() call. */ 214 private boolean skipping = false; 215 216 /** 217 * Creates a new instance that reads a JSON-encoded stream from {@code in}. 218 */ JsonReader(Reader in)219 public JsonReader(Reader in) { 220 if (in == null) { 221 throw new NullPointerException("in == null"); 222 } 223 this.in = in; 224 } 225 226 /** 227 * Configure this parser to be be liberal in what it accepts. By default, 228 * this parser is strict and only accepts JSON as specified by <a 229 * href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>. Setting the 230 * parser to lenient causes it to ignore the following syntax errors: 231 * 232 * <ul> 233 * <li>End of line comments starting with {@code //} or {@code #} and 234 * ending with a newline character. 235 * <li>C-style comments starting with {@code /*} and ending with 236 * {@code *}{@code /}. Such comments may not be nested. 237 * <li>Names that are unquoted or {@code 'single quoted'}. 238 * <li>Strings that are unquoted or {@code 'single quoted'}. 239 * <li>Array elements separated by {@code ;} instead of {@code ,}. 240 * <li>Unnecessary array separators. These are interpreted as if null 241 * was the omitted value. 242 * <li>Names and values separated by {@code =} or {@code =>} instead of 243 * {@code :}. 244 * <li>Name/value pairs separated by {@code ;} instead of {@code ,}. 245 * </ul> 246 */ setLenient(boolean lenient)247 public void setLenient(boolean lenient) { 248 this.lenient = lenient; 249 } 250 251 /** 252 * Consumes the next token from the JSON stream and asserts that it is the 253 * beginning of a new array. 254 */ beginArray()255 public void beginArray() throws IOException { 256 expect(JsonToken.BEGIN_ARRAY); 257 } 258 259 /** 260 * Consumes the next token from the JSON stream and asserts that it is the 261 * end of the current array. 262 */ endArray()263 public void endArray() throws IOException { 264 expect(JsonToken.END_ARRAY); 265 } 266 267 /** 268 * Consumes the next token from the JSON stream and asserts that it is the 269 * beginning of a new object. 270 */ beginObject()271 public void beginObject() throws IOException { 272 expect(JsonToken.BEGIN_OBJECT); 273 } 274 275 /** 276 * Consumes the next token from the JSON stream and asserts that it is the 277 * end of the current array. 278 */ endObject()279 public void endObject() throws IOException { 280 expect(JsonToken.END_OBJECT); 281 } 282 283 /** 284 * Consumes {@code expected}. 285 */ expect(JsonToken expected)286 private void expect(JsonToken expected) throws IOException { 287 peek(); 288 if (token != expected) { 289 throw new IllegalStateException("Expected " + expected + " but was " + peek()); 290 } 291 advance(); 292 } 293 294 /** 295 * Returns true if the current array or object has another element. 296 */ hasNext()297 public boolean hasNext() throws IOException { 298 peek(); 299 return token != JsonToken.END_OBJECT && token != JsonToken.END_ARRAY; 300 } 301 302 /** 303 * Returns the type of the next token without consuming it. 304 */ peek()305 public JsonToken peek() throws IOException { 306 if (token != null) { 307 return token; 308 } 309 310 switch (peekStack()) { 311 case EMPTY_DOCUMENT: 312 replaceTop(JsonScope.NONEMPTY_DOCUMENT); 313 JsonToken firstToken = nextValue(); 314 if (token != JsonToken.BEGIN_ARRAY && token != JsonToken.BEGIN_OBJECT) { 315 throw new IOException( 316 "Expected JSON document to start with '[' or '{' but was " + token); 317 } 318 return firstToken; 319 case EMPTY_ARRAY: 320 return nextInArray(true); 321 case NONEMPTY_ARRAY: 322 return nextInArray(false); 323 case EMPTY_OBJECT: 324 return nextInObject(true); 325 case DANGLING_NAME: 326 return objectValue(); 327 case NONEMPTY_OBJECT: 328 return nextInObject(false); 329 case NONEMPTY_DOCUMENT: 330 return token = JsonToken.END_DOCUMENT; 331 case CLOSED: 332 throw new IllegalStateException("JsonReader is closed"); 333 default: 334 throw new AssertionError(); 335 } 336 } 337 338 /** 339 * Advances the cursor in the JSON stream to the next token. 340 */ advance()341 private JsonToken advance() throws IOException { 342 peek(); 343 344 JsonToken result = token; 345 token = null; 346 value = null; 347 name = null; 348 return result; 349 } 350 351 /** 352 * Returns the next token, a {@link JsonToken#NAME property name}, and 353 * consumes it. 354 * 355 * @throws IOException if the next token in the stream is not a property 356 * name. 357 */ nextName()358 public String nextName() throws IOException { 359 peek(); 360 if (token != JsonToken.NAME) { 361 throw new IllegalStateException("Expected a name but was " + peek()); 362 } 363 String result = name; 364 advance(); 365 return result; 366 } 367 368 /** 369 * Returns the {@link JsonToken#STRING string} value of the next token, 370 * consuming it. If the next token is a number, this method will return its 371 * string form. 372 * 373 * @throws IllegalStateException if the next token is not a string or if 374 * this reader is closed. 375 */ nextString()376 public String nextString() throws IOException { 377 peek(); 378 if (token != JsonToken.STRING && token != JsonToken.NUMBER) { 379 throw new IllegalStateException("Expected a string but was " + peek()); 380 } 381 382 String result = value; 383 advance(); 384 return result; 385 } 386 387 /** 388 * Returns the {@link JsonToken#BOOLEAN boolean} value of the next token, 389 * consuming it. 390 * 391 * @throws IllegalStateException if the next token is not a boolean or if 392 * this reader is closed. 393 */ nextBoolean()394 public boolean nextBoolean() throws IOException { 395 peek(); 396 if (token != JsonToken.BOOLEAN) { 397 throw new IllegalStateException("Expected a boolean but was " + token); 398 } 399 400 boolean result = (value == TRUE); 401 advance(); 402 return result; 403 } 404 405 /** 406 * Consumes the next token from the JSON stream and asserts that it is a 407 * literal null. 408 * 409 * @throws IllegalStateException if the next token is not null or if this 410 * reader is closed. 411 */ nextNull()412 public void nextNull() throws IOException { 413 peek(); 414 if (token != JsonToken.NULL) { 415 throw new IllegalStateException("Expected null but was " + token); 416 } 417 418 advance(); 419 } 420 421 /** 422 * Returns the {@link JsonToken#NUMBER double} value of the next token, 423 * consuming it. If the next token is a string, this method will attempt to 424 * parse it as a double using {@link Double#parseDouble(String)}. 425 * 426 * @throws IllegalStateException if the next token is not a literal value. 427 */ nextDouble()428 public double nextDouble() throws IOException { 429 peek(); 430 if (token != JsonToken.STRING && token != JsonToken.NUMBER) { 431 throw new IllegalStateException("Expected a double but was " + token); 432 } 433 434 double result = Double.parseDouble(value); 435 advance(); 436 return result; 437 } 438 439 /** 440 * Returns the {@link JsonToken#NUMBER long} value of the next token, 441 * consuming it. If the next token is a string, this method will attempt to 442 * parse it as a long. If the next token's numeric value cannot be exactly 443 * represented by a Java {@code long}, this method throws. 444 * 445 * @throws IllegalStateException if the next token is not a literal value. 446 * @throws NumberFormatException if the next literal value cannot be parsed 447 * as a number, or exactly represented as a long. 448 */ nextLong()449 public long nextLong() throws IOException { 450 peek(); 451 if (token != JsonToken.STRING && token != JsonToken.NUMBER) { 452 throw new IllegalStateException("Expected a long but was " + token); 453 } 454 455 long result; 456 try { 457 result = Long.parseLong(value); 458 } catch (NumberFormatException ignored) { 459 double asDouble = Double.parseDouble(value); // don't catch this NumberFormatException 460 result = (long) asDouble; 461 if ((double) result != asDouble) { 462 throw new NumberFormatException(value); 463 } 464 } 465 466 advance(); 467 return result; 468 } 469 470 /** 471 * Returns the {@link JsonToken#NUMBER int} value of the next token, 472 * consuming it. If the next token is a string, this method will attempt to 473 * parse it as an int. If the next token's numeric value cannot be exactly 474 * represented by a Java {@code int}, this method throws. 475 * 476 * @throws IllegalStateException if the next token is not a literal value. 477 * @throws NumberFormatException if the next literal value cannot be parsed 478 * as a number, or exactly represented as an int. 479 */ nextInt()480 public int nextInt() throws IOException { 481 peek(); 482 if (token != JsonToken.STRING && token != JsonToken.NUMBER) { 483 throw new IllegalStateException("Expected an int but was " + token); 484 } 485 486 int result; 487 try { 488 result = Integer.parseInt(value); 489 } catch (NumberFormatException ignored) { 490 double asDouble = Double.parseDouble(value); // don't catch this NumberFormatException 491 result = (int) asDouble; 492 if ((double) result != asDouble) { 493 throw new NumberFormatException(value); 494 } 495 } 496 497 advance(); 498 return result; 499 } 500 501 /** 502 * Closes this JSON reader and the underlying {@link Reader}. 503 */ close()504 public void close() throws IOException { 505 value = null; 506 token = null; 507 stack.clear(); 508 stack.add(JsonScope.CLOSED); 509 in.close(); 510 } 511 512 /** 513 * Skips the next value recursively. If it is an object or array, all nested 514 * elements are skipped. This method is intended for use when the JSON token 515 * stream contains unrecognized or unhandled values. 516 */ skipValue()517 public void skipValue() throws IOException { 518 skipping = true; 519 try { 520 int count = 0; 521 do { 522 JsonToken token = advance(); 523 if (token == JsonToken.BEGIN_ARRAY || token == JsonToken.BEGIN_OBJECT) { 524 count++; 525 } else if (token == JsonToken.END_ARRAY || token == JsonToken.END_OBJECT) { 526 count--; 527 } 528 } while (count != 0); 529 } finally { 530 skipping = false; 531 } 532 } 533 peekStack()534 private JsonScope peekStack() { 535 return stack.get(stack.size() - 1); 536 } 537 pop()538 private JsonScope pop() { 539 return stack.remove(stack.size() - 1); 540 } 541 push(JsonScope newTop)542 private void push(JsonScope newTop) { 543 stack.add(newTop); 544 } 545 546 /** 547 * Replace the value on the top of the stack with the given value. 548 */ replaceTop(JsonScope newTop)549 private void replaceTop(JsonScope newTop) { 550 stack.set(stack.size() - 1, newTop); 551 } 552 nextInArray(boolean firstElement)553 private JsonToken nextInArray(boolean firstElement) throws IOException { 554 if (firstElement) { 555 replaceTop(JsonScope.NONEMPTY_ARRAY); 556 } else { 557 /* Look for a comma before each element after the first element. */ 558 switch (nextNonWhitespace()) { 559 case ']': 560 pop(); 561 return token = JsonToken.END_ARRAY; 562 case ';': 563 checkLenient(); // fall-through 564 case ',': 565 break; 566 default: 567 throw syntaxError("Unterminated array"); 568 } 569 } 570 571 switch (nextNonWhitespace()) { 572 case ']': 573 if (firstElement) { 574 pop(); 575 return token = JsonToken.END_ARRAY; 576 } 577 // fall-through to handle ",]" 578 case ';': 579 case ',': 580 /* In lenient mode, a 0-length literal means 'null' */ 581 checkLenient(); 582 pos--; 583 value = "null"; 584 return token = JsonToken.NULL; 585 default: 586 pos--; 587 return nextValue(); 588 } 589 } 590 nextInObject(boolean firstElement)591 private JsonToken nextInObject(boolean firstElement) throws IOException { 592 /* 593 * Read delimiters. Either a comma/semicolon separating this and the 594 * previous name-value pair, or a close brace to denote the end of the 595 * object. 596 */ 597 if (firstElement) { 598 /* Peek to see if this is the empty object. */ 599 switch (nextNonWhitespace()) { 600 case '}': 601 pop(); 602 return token = JsonToken.END_OBJECT; 603 default: 604 pos--; 605 } 606 } else { 607 switch (nextNonWhitespace()) { 608 case '}': 609 pop(); 610 return token = JsonToken.END_OBJECT; 611 case ';': 612 case ',': 613 break; 614 default: 615 throw syntaxError("Unterminated object"); 616 } 617 } 618 619 /* Read the name. */ 620 int quote = nextNonWhitespace(); 621 switch (quote) { 622 case '\'': 623 checkLenient(); // fall-through 624 case '"': 625 name = nextString((char) quote); 626 break; 627 default: 628 checkLenient(); 629 pos--; 630 name = nextLiteral(false); 631 if (name.isEmpty()) { 632 throw syntaxError("Expected name"); 633 } 634 } 635 636 replaceTop(JsonScope.DANGLING_NAME); 637 return token = JsonToken.NAME; 638 } 639 objectValue()640 private JsonToken objectValue() throws IOException { 641 /* 642 * Read the name/value separator. Usually a colon ':'. In lenient mode 643 * we also accept an equals sign '=', or an arrow "=>". 644 */ 645 switch (nextNonWhitespace()) { 646 case ':': 647 break; 648 case '=': 649 checkLenient(); 650 if ((pos < limit || fillBuffer(1)) && buffer[pos] == '>') { 651 pos++; 652 } 653 break; 654 default: 655 throw syntaxError("Expected ':'"); 656 } 657 658 replaceTop(JsonScope.NONEMPTY_OBJECT); 659 return nextValue(); 660 } 661 nextValue()662 private JsonToken nextValue() throws IOException { 663 int c = nextNonWhitespace(); 664 switch (c) { 665 case '{': 666 push(JsonScope.EMPTY_OBJECT); 667 return token = JsonToken.BEGIN_OBJECT; 668 669 case '[': 670 push(JsonScope.EMPTY_ARRAY); 671 return token = JsonToken.BEGIN_ARRAY; 672 673 case '\'': 674 checkLenient(); // fall-through 675 case '"': 676 value = nextString((char) c); 677 return token = JsonToken.STRING; 678 679 default: 680 pos--; 681 return readLiteral(); 682 } 683 } 684 685 /** 686 * Returns true once {@code limit - pos >= minimum}. If the data is 687 * exhausted before that many characters are available, this returns 688 * false. 689 */ fillBuffer(int minimum)690 private boolean fillBuffer(int minimum) throws IOException { 691 if (limit != pos) { 692 limit -= pos; 693 System.arraycopy(buffer, pos, buffer, 0, limit); 694 } else { 695 limit = 0; 696 } 697 698 pos = 0; 699 int total; 700 while ((total = in.read(buffer, limit, buffer.length - limit)) != -1) { 701 limit += total; 702 if (limit >= minimum) { 703 return true; 704 } 705 } 706 return false; 707 } 708 nextNonWhitespace()709 private int nextNonWhitespace() throws IOException { 710 while (pos < limit || fillBuffer(1)) { 711 int c = buffer[pos++]; 712 switch (c) { 713 case '\t': 714 case ' ': 715 case '\n': 716 case '\r': 717 continue; 718 719 case '/': 720 if (pos == limit && !fillBuffer(1)) { 721 return c; 722 } 723 724 checkLenient(); 725 char peek = buffer[pos]; 726 switch (peek) { 727 case '*': 728 // skip a /* c-style comment */ 729 pos++; 730 if (!skipTo("*/")) { 731 throw syntaxError("Unterminated comment"); 732 } 733 pos += 2; 734 continue; 735 736 case '/': 737 // skip a // end-of-line comment 738 pos++; 739 skipToEndOfLine(); 740 continue; 741 742 default: 743 return c; 744 } 745 746 case '#': 747 /* 748 * Skip a # hash end-of-line comment. The JSON RFC doesn't 749 * specify this behaviour, but it's required to parse 750 * existing documents. See http://b/2571423. 751 */ 752 checkLenient(); 753 skipToEndOfLine(); 754 continue; 755 756 default: 757 return c; 758 } 759 } 760 761 throw syntaxError("End of input"); 762 } 763 checkLenient()764 private void checkLenient() throws IOException { 765 if (!lenient) { 766 throw syntaxError("Use JsonReader.setLenient(true) to accept malformed JSON"); 767 } 768 } 769 770 /** 771 * Advances the position until after the next newline character. If the line 772 * is terminated by "\r\n", the '\n' must be consumed as whitespace by the 773 * caller. 774 */ skipToEndOfLine()775 private void skipToEndOfLine() throws IOException { 776 while (pos < limit || fillBuffer(1)) { 777 char c = buffer[pos++]; 778 if (c == '\r' || c == '\n') { 779 break; 780 } 781 } 782 } 783 skipTo(String toFind)784 private boolean skipTo(String toFind) throws IOException { 785 outer: 786 for (; pos + toFind.length() < limit || fillBuffer(toFind.length()); pos++) { 787 for (int c = 0; c < toFind.length(); c++) { 788 if (buffer[pos + c] != toFind.charAt(c)) { 789 continue outer; 790 } 791 } 792 return true; 793 } 794 return false; 795 } 796 797 /** 798 * Returns the string up to but not including {@code quote}, unescaping any 799 * character escape sequences encountered along the way. The opening quote 800 * should have already been read. This consumes the closing quote, but does 801 * not include it in the returned string. 802 * 803 * @param quote either ' or ". 804 * @throws NumberFormatException if any unicode escape sequences are 805 * malformed. 806 */ nextString(char quote)807 private String nextString(char quote) throws IOException { 808 StringBuilder builder = null; 809 do { 810 /* the index of the first character not yet appended to the builder. */ 811 int start = pos; 812 while (pos < limit) { 813 int c = buffer[pos++]; 814 815 if (c == quote) { 816 if (skipping) { 817 return "skipped!"; 818 } else if (builder == null) { 819 return new String(buffer, start, pos - start - 1); 820 } else { 821 builder.append(buffer, start, pos - start - 1); 822 return builder.toString(); 823 } 824 825 } else if (c == '\\') { 826 if (builder == null) { 827 builder = new StringBuilder(); 828 } 829 builder.append(buffer, start, pos - start - 1); 830 builder.append(readEscapeCharacter()); 831 start = pos; 832 } 833 } 834 835 if (builder == null) { 836 builder = new StringBuilder(); 837 } 838 builder.append(buffer, start, pos - start); 839 } while (fillBuffer(1)); 840 841 throw syntaxError("Unterminated string"); 842 } 843 844 /** 845 * Reads the value up to but not including any delimiter characters. This 846 * does not consume the delimiter character. 847 * 848 * @param assignOffsetsOnly true for this method to only set the valuePos 849 * and valueLength fields and return a null result. This only works if 850 * the literal is short; a string is returned otherwise. 851 */ nextLiteral(boolean assignOffsetsOnly)852 private String nextLiteral(boolean assignOffsetsOnly) throws IOException { 853 StringBuilder builder = null; 854 valuePos = -1; 855 valueLength = 0; 856 int i = 0; 857 858 findNonLiteralCharacter: 859 while (true) { 860 for (; pos + i < limit; i++) { 861 switch (buffer[pos + i]) { 862 case '/': 863 case '\\': 864 case ';': 865 case '#': 866 case '=': 867 checkLenient(); // fall-through 868 case '{': 869 case '}': 870 case '[': 871 case ']': 872 case ':': 873 case ',': 874 case ' ': 875 case '\t': 876 case '\f': 877 case '\r': 878 case '\n': 879 break findNonLiteralCharacter; 880 } 881 } 882 883 /* 884 * Attempt to load the entire literal into the buffer at once. If 885 * we run out of input, add a non-literal character at the end so 886 * that decoding doesn't need to do bounds checks. 887 */ 888 if (i < buffer.length) { 889 if (fillBuffer(i + 1)) { 890 continue; 891 } else { 892 buffer[limit] = '\0'; 893 break; 894 } 895 } 896 897 // use a StringBuilder when the value is too long. It must be an unquoted string. 898 if (builder == null) { 899 builder = new StringBuilder(); 900 } 901 builder.append(buffer, pos, i); 902 valueLength += i; 903 pos += i; 904 i = 0; 905 if (!fillBuffer(1)) { 906 break; 907 } 908 } 909 910 String result; 911 if (assignOffsetsOnly && builder == null) { 912 valuePos = pos; 913 result = null; 914 } else if (skipping) { 915 result = "skipped!"; 916 } else if (builder == null) { 917 result = new String(buffer, pos, i); 918 } else { 919 builder.append(buffer, pos, i); 920 result = builder.toString(); 921 } 922 valueLength += i; 923 pos += i; 924 return result; 925 } 926 toString()927 @Override public String toString() { 928 return getClass().getSimpleName() + " near " + getSnippet(); 929 } 930 931 /** 932 * Unescapes the character identified by the character or characters that 933 * immediately follow a backslash. The backslash '\' should have already 934 * been read. This supports both unicode escapes "u000A" and two-character 935 * escapes "\n". 936 * 937 * @throws NumberFormatException if any unicode escape sequences are 938 * malformed. 939 */ readEscapeCharacter()940 private char readEscapeCharacter() throws IOException { 941 if (pos == limit && !fillBuffer(1)) { 942 throw syntaxError("Unterminated escape sequence"); 943 } 944 945 char escaped = buffer[pos++]; 946 switch (escaped) { 947 case 'u': 948 if (pos + 4 > limit && !fillBuffer(4)) { 949 throw syntaxError("Unterminated escape sequence"); 950 } 951 String hex = new String(buffer, pos, 4); 952 pos += 4; 953 return (char) Integer.parseInt(hex, 16); 954 955 case 't': 956 return '\t'; 957 958 case 'b': 959 return '\b'; 960 961 case 'n': 962 return '\n'; 963 964 case 'r': 965 return '\r'; 966 967 case 'f': 968 return '\f'; 969 970 case '\'': 971 case '"': 972 case '\\': 973 default: 974 return escaped; 975 } 976 } 977 978 /** 979 * Reads a null, boolean, numeric or unquoted string literal value. 980 */ readLiteral()981 private JsonToken readLiteral() throws IOException { 982 value = nextLiteral(true); 983 if (valueLength == 0) { 984 throw syntaxError("Expected literal value"); 985 } 986 token = decodeLiteral(); 987 if (token == JsonToken.STRING) { 988 checkLenient(); 989 } 990 return token; 991 } 992 993 /** 994 * Assigns {@code nextToken} based on the value of {@code nextValue}. 995 */ decodeLiteral()996 private JsonToken decodeLiteral() throws IOException { 997 if (valuePos == -1) { 998 // it was too long to fit in the buffer so it can only be a string 999 return JsonToken.STRING; 1000 } else if (valueLength == 4 1001 && ('n' == buffer[valuePos ] || 'N' == buffer[valuePos ]) 1002 && ('u' == buffer[valuePos + 1] || 'U' == buffer[valuePos + 1]) 1003 && ('l' == buffer[valuePos + 2] || 'L' == buffer[valuePos + 2]) 1004 && ('l' == buffer[valuePos + 3] || 'L' == buffer[valuePos + 3])) { 1005 value = "null"; 1006 return JsonToken.NULL; 1007 } else if (valueLength == 4 1008 && ('t' == buffer[valuePos ] || 'T' == buffer[valuePos ]) 1009 && ('r' == buffer[valuePos + 1] || 'R' == buffer[valuePos + 1]) 1010 && ('u' == buffer[valuePos + 2] || 'U' == buffer[valuePos + 2]) 1011 && ('e' == buffer[valuePos + 3] || 'E' == buffer[valuePos + 3])) { 1012 value = TRUE; 1013 return JsonToken.BOOLEAN; 1014 } else if (valueLength == 5 1015 && ('f' == buffer[valuePos ] || 'F' == buffer[valuePos ]) 1016 && ('a' == buffer[valuePos + 1] || 'A' == buffer[valuePos + 1]) 1017 && ('l' == buffer[valuePos + 2] || 'L' == buffer[valuePos + 2]) 1018 && ('s' == buffer[valuePos + 3] || 'S' == buffer[valuePos + 3]) 1019 && ('e' == buffer[valuePos + 4] || 'E' == buffer[valuePos + 4])) { 1020 value = FALSE; 1021 return JsonToken.BOOLEAN; 1022 } else { 1023 value = new String(buffer, valuePos, valueLength); 1024 return decodeNumber(buffer, valuePos, valueLength); 1025 } 1026 } 1027 1028 /** 1029 * Determine whether the characters is a JSON number. Numbers are of the 1030 * form -12.34e+56. Fractional and exponential parts are optional. Leading 1031 * zeroes are not allowed in the value or exponential part, but are allowed 1032 * in the fraction. 1033 * 1034 * <p>This has a side effect of setting isInteger. 1035 */ decodeNumber(char[] chars, int offset, int length)1036 private JsonToken decodeNumber(char[] chars, int offset, int length) { 1037 int i = offset; 1038 int c = chars[i]; 1039 1040 if (c == '-') { 1041 c = chars[++i]; 1042 } 1043 1044 if (c == '0') { 1045 c = chars[++i]; 1046 } else if (c >= '1' && c <= '9') { 1047 c = chars[++i]; 1048 while (c >= '0' && c <= '9') { 1049 c = chars[++i]; 1050 } 1051 } else { 1052 return JsonToken.STRING; 1053 } 1054 1055 if (c == '.') { 1056 c = chars[++i]; 1057 while (c >= '0' && c <= '9') { 1058 c = chars[++i]; 1059 } 1060 } 1061 1062 if (c == 'e' || c == 'E') { 1063 c = chars[++i]; 1064 if (c == '+' || c == '-') { 1065 c = chars[++i]; 1066 } 1067 if (c >= '0' && c <= '9') { 1068 c = chars[++i]; 1069 while (c >= '0' && c <= '9') { 1070 c = chars[++i]; 1071 } 1072 } else { 1073 return JsonToken.STRING; 1074 } 1075 } 1076 1077 if (i == offset + length) { 1078 return JsonToken.NUMBER; 1079 } else { 1080 return JsonToken.STRING; 1081 } 1082 } 1083 1084 /** 1085 * Throws a new IO exception with the given message and a context snippet 1086 * with this reader's content. 1087 */ syntaxError(String message)1088 public IOException syntaxError(String message) throws IOException { 1089 throw new JsonSyntaxException(message + " near " + getSnippet()); 1090 } 1091 getSnippet()1092 private CharSequence getSnippet() { 1093 StringBuilder snippet = new StringBuilder(); 1094 int beforePos = Math.min(pos, 20); 1095 snippet.append(buffer, pos - beforePos, beforePos); 1096 int afterPos = Math.min(limit - pos, 20); 1097 snippet.append(buffer, pos, afterPos); 1098 return snippet; 1099 } 1100 1101 private static class JsonSyntaxException extends IOException { JsonSyntaxException(String s)1102 private JsonSyntaxException(String s) { 1103 super(s); 1104 } 1105 } 1106 } 1107