1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.util;
18 
19 import java.io.Closeable;
20 import java.io.EOFException;
21 import java.io.IOException;
22 import java.io.Reader;
23 import java.util.ArrayList;
24 import java.util.List;
25 import libcore.internal.StringPool;
26 
27 /**
28  * Reads a JSON (<a href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>)
29  * encoded value as a stream of tokens. This stream includes both literal
30  * values (strings, numbers, booleans, and nulls) as well as the begin and
31  * end delimiters of objects and arrays. The tokens are traversed in
32  * depth-first order, the same order that they appear in the JSON document.
33  * Within JSON objects, name/value pairs are represented by a single token.
34  *
35  * <h3>Parsing JSON</h3>
36  * To create a recursive descent parser for your own JSON streams, first create
37  * an entry point method that creates a {@code JsonReader}.
38  *
39  * <p>Next, create handler methods for each structure in your JSON text. You'll
40  * need a method for each object type and for each array type.
41  * <ul>
42  *   <li>Within <strong>array handling</strong> methods, first call {@link
43  *       #beginArray} to consume the array's opening bracket. Then create a
44  *       while loop that accumulates values, terminating when {@link #hasNext}
45  *       is false. Finally, read the array's closing bracket by calling {@link
46  *       #endArray}.
47  *   <li>Within <strong>object handling</strong> methods, first call {@link
48  *       #beginObject} to consume the object's opening brace. Then create a
49  *       while loop that assigns values to local variables based on their name.
50  *       This loop should terminate when {@link #hasNext} is false. Finally,
51  *       read the object's closing brace by calling {@link #endObject}.
52  * </ul>
53  * <p>When a nested object or array is encountered, delegate to the
54  * corresponding handler method.
55  *
56  * <p>When an unknown name is encountered, strict parsers should fail with an
57  * exception. Lenient parsers should call {@link #skipValue()} to recursively
58  * skip the value's nested tokens, which may otherwise conflict.
59  *
60  * <p>If a value may be null, you should first check using {@link #peek()}.
61  * Null literals can be consumed using either {@link #nextNull()} or {@link
62  * #skipValue()}.
63  *
64  * <h3>Example</h3>
65  * Suppose we'd like to parse a stream of messages such as the following: <pre> {@code
66  * [
67  *   {
68  *     "id": 912345678901,
69  *     "text": "How do I read JSON on Android?",
70  *     "geo": null,
71  *     "user": {
72  *       "name": "android_newb",
73  *       "followers_count": 41
74  *      }
75  *   },
76  *   {
77  *     "id": 912345678902,
78  *     "text": "@android_newb just use android.util.JsonReader!",
79  *     "geo": [50.454722, -104.606667],
80  *     "user": {
81  *       "name": "jesse",
82  *       "followers_count": 2
83  *     }
84  *   }
85  * ]}</pre>
86  * This code implements the parser for the above structure: <pre>   {@code
87  *
88  *   public List<Message> readJsonStream(InputStream in) throws IOException {
89  *     JsonReader reader = new JsonReader(new InputStreamReader(in, "UTF-8"));
90  *     try {
91  *       return readMessagesArray(reader);
92  *     } finally {
93  *       reader.close();
94  *     }
95  *   }
96  *
97  *   public List<Message> readMessagesArray(JsonReader reader) throws IOException {
98  *     List<Message> messages = new ArrayList<Message>();
99  *
100  *     reader.beginArray();
101  *     while (reader.hasNext()) {
102  *       messages.add(readMessage(reader));
103  *     }
104  *     reader.endArray();
105  *     return messages;
106  *   }
107  *
108  *   public Message readMessage(JsonReader reader) throws IOException {
109  *     long id = -1;
110  *     String text = null;
111  *     User user = null;
112  *     List<Double> geo = null;
113  *
114  *     reader.beginObject();
115  *     while (reader.hasNext()) {
116  *       String name = reader.nextName();
117  *       if (name.equals("id")) {
118  *         id = reader.nextLong();
119  *       } else if (name.equals("text")) {
120  *         text = reader.nextString();
121  *       } else if (name.equals("geo") && reader.peek() != JsonToken.NULL) {
122  *         geo = readDoublesArray(reader);
123  *       } else if (name.equals("user")) {
124  *         user = readUser(reader);
125  *       } else {
126  *         reader.skipValue();
127  *       }
128  *     }
129  *     reader.endObject();
130  *     return new Message(id, text, user, geo);
131  *   }
132  *
133  *   public List<Double> readDoublesArray(JsonReader reader) throws IOException {
134  *     List<Double> doubles = new ArrayList<Double>();
135  *
136  *     reader.beginArray();
137  *     while (reader.hasNext()) {
138  *       doubles.add(reader.nextDouble());
139  *     }
140  *     reader.endArray();
141  *     return doubles;
142  *   }
143  *
144  *   public User readUser(JsonReader reader) throws IOException {
145  *     String username = null;
146  *     int followersCount = -1;
147  *
148  *     reader.beginObject();
149  *     while (reader.hasNext()) {
150  *       String name = reader.nextName();
151  *       if (name.equals("name")) {
152  *         username = reader.nextString();
153  *       } else if (name.equals("followers_count")) {
154  *         followersCount = reader.nextInt();
155  *       } else {
156  *         reader.skipValue();
157  *       }
158  *     }
159  *     reader.endObject();
160  *     return new User(username, followersCount);
161  *   }}</pre>
162  *
163  * <h3>Number Handling</h3>
164  * This reader permits numeric values to be read as strings and string values to
165  * be read as numbers. For example, both elements of the JSON array {@code
166  * [1, "1"]} may be read using either {@link #nextInt} or {@link #nextString}.
167  * This behavior is intended to prevent lossy numeric conversions: double is
168  * JavaScript's only numeric type and very large values like {@code
169  * 9007199254740993} cannot be represented exactly on that platform. To minimize
170  * precision loss, extremely large values should be written and read as strings
171  * in JSON.
172  *
173  * <p>Each {@code JsonReader} may be used to read a single JSON stream. Instances
174  * of this class are not thread safe.
175  */
176 public final class JsonReader implements Closeable {
177 
178     private static final String TRUE = "true";
179     private static final String FALSE = "false";
180 
181     private final StringPool stringPool = new StringPool();
182 
183     /** The input JSON. */
184     private final Reader in;
185 
186     /** True to accept non-spec compliant JSON */
187     private boolean lenient = false;
188 
189     /**
190      * Use a manual buffer to easily read and unread upcoming characters, and
191      * also so we can create strings without an intermediate StringBuilder.
192      * We decode literals directly out of this buffer, so it must be at least as
193      * long as the longest token that can be reported as a number.
194      */
195     private final char[] buffer = new char[1024];
196     private int pos = 0;
197     private int limit = 0;
198 
199     /*
200      * The offset of the first character in the buffer.
201      */
202     private int bufferStartLine = 1;
203     private int bufferStartColumn = 1;
204 
205     private final List<JsonScope> stack = new ArrayList<JsonScope>();
206     {
207         push(JsonScope.EMPTY_DOCUMENT);
208     }
209 
210     /**
211      * The type of the next token to be returned by {@link #peek} and {@link
212      * #advance}. If null, peek() will assign a value.
213      */
214     private JsonToken token;
215 
216     /** The text of the next name. */
217     private String name;
218 
219     /*
220      * For the next literal value, we may have the text value, or the position
221      * and length in the buffer.
222      */
223     private String value;
224     private int valuePos;
225     private int valueLength;
226 
227     /** True if we're currently handling a skipValue() call. */
228     private boolean skipping = false;
229 
230     /**
231      * Creates a new instance that reads a JSON-encoded stream from {@code in}.
232      */
JsonReader(Reader in)233     public JsonReader(Reader in) {
234         if (in == null) {
235             throw new NullPointerException("in == null");
236         }
237         this.in = in;
238     }
239 
240     /**
241      * Configure this parser to be  be liberal in what it accepts. By default,
242      * this parser is strict and only accepts JSON as specified by <a
243      * href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>. Setting the
244      * parser to lenient causes it to ignore the following syntax errors:
245      *
246      * <ul>
247      *   <li>End of line comments starting with {@code //} or {@code #} and
248      *       ending with a newline character.
249      *   <li>C-style comments starting with {@code /*} and ending with
250      *       {@code *}{@code /}. Such comments may not be nested.
251      *   <li>Names that are unquoted or {@code 'single quoted'}.
252      *   <li>Strings that are unquoted or {@code 'single quoted'}.
253      *   <li>Array elements separated by {@code ;} instead of {@code ,}.
254      *   <li>Unnecessary array separators. These are interpreted as if null
255      *       was the omitted value.
256      *   <li>Names and values separated by {@code =} or {@code =>} instead of
257      *       {@code :}.
258      *   <li>Name/value pairs separated by {@code ;} instead of {@code ,}.
259      * </ul>
260      */
setLenient(boolean lenient)261     public void setLenient(boolean lenient) {
262         this.lenient = lenient;
263     }
264 
265     /**
266      * Returns true if this parser is liberal in what it accepts.
267      */
isLenient()268     public boolean isLenient() {
269         return lenient;
270     }
271 
272     /**
273      * Consumes the next token from the JSON stream and asserts that it is the
274      * beginning of a new array.
275      */
beginArray()276     public void beginArray() throws IOException {
277         expect(JsonToken.BEGIN_ARRAY);
278     }
279 
280     /**
281      * Consumes the next token from the JSON stream and asserts that it is the
282      * end of the current array.
283      */
endArray()284     public void endArray() throws IOException {
285         expect(JsonToken.END_ARRAY);
286     }
287 
288     /**
289      * Consumes the next token from the JSON stream and asserts that it is the
290      * beginning of a new object.
291      */
beginObject()292     public void beginObject() throws IOException {
293         expect(JsonToken.BEGIN_OBJECT);
294     }
295 
296     /**
297      * Consumes the next token from the JSON stream and asserts that it is the
298      * end of the current array.
299      */
endObject()300     public void endObject() throws IOException {
301         expect(JsonToken.END_OBJECT);
302     }
303 
304     /**
305      * Consumes {@code expected}.
306      */
expect(JsonToken expected)307     private void expect(JsonToken expected) throws IOException {
308         peek();
309         if (token != expected) {
310             throw new IllegalStateException("Expected " + expected + " but was " + peek());
311         }
312         advance();
313     }
314 
315     /**
316      * Returns true if the current array or object has another element.
317      */
hasNext()318     public boolean hasNext() throws IOException {
319         peek();
320         return token != JsonToken.END_OBJECT && token != JsonToken.END_ARRAY;
321     }
322 
323     /**
324      * Returns the type of the next token without consuming it.
325      */
peek()326     public JsonToken peek() throws IOException {
327         if (token != null) {
328           return token;
329         }
330 
331         switch (peekStack()) {
332             case EMPTY_DOCUMENT:
333                 replaceTop(JsonScope.NONEMPTY_DOCUMENT);
334                 JsonToken firstToken = nextValue();
335                 if (!lenient && token != JsonToken.BEGIN_ARRAY && token != JsonToken.BEGIN_OBJECT) {
336                     throw new IOException(
337                             "Expected JSON document to start with '[' or '{' but was " + token);
338                 }
339                 return firstToken;
340             case EMPTY_ARRAY:
341                 return nextInArray(true);
342             case NONEMPTY_ARRAY:
343                 return nextInArray(false);
344             case EMPTY_OBJECT:
345                 return nextInObject(true);
346             case DANGLING_NAME:
347                 return objectValue();
348             case NONEMPTY_OBJECT:
349                 return nextInObject(false);
350             case NONEMPTY_DOCUMENT:
351                 try {
352                     JsonToken token = nextValue();
353                     if (lenient) {
354                         return token;
355                     }
356                     throw syntaxError("Expected EOF");
357                 } catch (EOFException e) {
358                     return token = JsonToken.END_DOCUMENT; // TODO: avoid throwing here?
359                 }
360             case CLOSED:
361                 throw new IllegalStateException("JsonReader is closed");
362             default:
363                 throw new AssertionError();
364         }
365     }
366 
367     /**
368      * Advances the cursor in the JSON stream to the next token.
369      */
advance()370     private JsonToken advance() throws IOException {
371         peek();
372 
373         JsonToken result = token;
374         token = null;
375         value = null;
376         name = null;
377         return result;
378     }
379 
380     /**
381      * Returns the next token, a {@link JsonToken#NAME property name}, and
382      * consumes it.
383      *
384      * @throws IOException if the next token in the stream is not a property
385      *     name.
386      */
nextName()387     public String nextName() throws IOException {
388         peek();
389         if (token != JsonToken.NAME) {
390             throw new IllegalStateException("Expected a name but was " + peek());
391         }
392         String result = name;
393         advance();
394         return result;
395     }
396 
397     /**
398      * Returns the {@link JsonToken#STRING string} value of the next token,
399      * consuming it. If the next token is a number, this method will return its
400      * string form.
401      *
402      * @throws IllegalStateException if the next token is not a string or if
403      *     this reader is closed.
404      */
nextString()405     public String nextString() throws IOException {
406         peek();
407         if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
408             throw new IllegalStateException("Expected a string but was " + peek());
409         }
410 
411         String result = value;
412         advance();
413         return result;
414     }
415 
416     /**
417      * Returns the {@link JsonToken#BOOLEAN boolean} value of the next token,
418      * consuming it.
419      *
420      * @throws IllegalStateException if the next token is not a boolean or if
421      *     this reader is closed.
422      */
nextBoolean()423     public boolean nextBoolean() throws IOException {
424         peek();
425         if (token != JsonToken.BOOLEAN) {
426             throw new IllegalStateException("Expected a boolean but was " + token);
427         }
428 
429         boolean result = (value == TRUE);
430         advance();
431         return result;
432     }
433 
434     /**
435      * Consumes the next token from the JSON stream and asserts that it is a
436      * literal null.
437      *
438      * @throws IllegalStateException if the next token is not null or if this
439      *     reader is closed.
440      */
nextNull()441     public void nextNull() throws IOException {
442         peek();
443         if (token != JsonToken.NULL) {
444             throw new IllegalStateException("Expected null but was " + token);
445         }
446 
447         advance();
448     }
449 
450     /**
451      * Returns the {@link JsonToken#NUMBER double} value of the next token,
452      * consuming it. If the next token is a string, this method will attempt to
453      * parse it as a double using {@link Double#parseDouble(String)}.
454      *
455      * @throws IllegalStateException if the next token is not a literal value.
456      */
nextDouble()457     public double nextDouble() throws IOException {
458         peek();
459         if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
460             throw new IllegalStateException("Expected a double but was " + token);
461         }
462 
463         double result = Double.parseDouble(value);
464         advance();
465         return result;
466     }
467 
468     /**
469      * Returns the {@link JsonToken#NUMBER long} value of the next token,
470      * consuming it. If the next token is a string, this method will attempt to
471      * parse it as a long. If the next token's numeric value cannot be exactly
472      * represented by a Java {@code long}, this method throws.
473      *
474      * @throws IllegalStateException if the next token is not a literal value.
475      * @throws NumberFormatException if the next literal value cannot be parsed
476      *     as a number, or exactly represented as a long.
477      */
nextLong()478     public long nextLong() throws IOException {
479         peek();
480         if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
481             throw new IllegalStateException("Expected a long but was " + token);
482         }
483 
484         long result;
485         try {
486             result = Long.parseLong(value);
487         } catch (NumberFormatException ignored) {
488             double asDouble = Double.parseDouble(value); // don't catch this NumberFormatException
489             result = (long) asDouble;
490             if ((double) result != asDouble) {
491                 throw new NumberFormatException(value);
492             }
493         }
494 
495         advance();
496         return result;
497     }
498 
499     /**
500      * Returns the {@link JsonToken#NUMBER int} value of the next token,
501      * consuming it. If the next token is a string, this method will attempt to
502      * parse it as an int. If the next token's numeric value cannot be exactly
503      * represented by a Java {@code int}, this method throws.
504      *
505      * @throws IllegalStateException if the next token is not a literal value.
506      * @throws NumberFormatException if the next literal value cannot be parsed
507      *     as a number, or exactly represented as an int.
508      */
nextInt()509     public int nextInt() throws IOException {
510         peek();
511         if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
512             throw new IllegalStateException("Expected an int but was " + token);
513         }
514 
515         int result;
516         try {
517             result = Integer.parseInt(value);
518         } catch (NumberFormatException ignored) {
519             double asDouble = Double.parseDouble(value); // don't catch this NumberFormatException
520             result = (int) asDouble;
521             if ((double) result != asDouble) {
522                 throw new NumberFormatException(value);
523             }
524         }
525 
526         advance();
527         return result;
528     }
529 
530     /**
531      * Closes this JSON reader and the underlying {@link Reader}.
532      */
close()533     public void close() throws IOException {
534         value = null;
535         token = null;
536         stack.clear();
537         stack.add(JsonScope.CLOSED);
538         in.close();
539     }
540 
541     /**
542      * Skips the next value recursively. If it is an object or array, all nested
543      * elements are skipped. This method is intended for use when the JSON token
544      * stream contains unrecognized or unhandled values.
545      */
skipValue()546     public void skipValue() throws IOException {
547         skipping = true;
548         try {
549             if (!hasNext() || peek() == JsonToken.END_DOCUMENT) {
550                 throw new IllegalStateException("No element left to skip");
551             }
552             int count = 0;
553             do {
554                 JsonToken token = advance();
555                 if (token == JsonToken.BEGIN_ARRAY || token == JsonToken.BEGIN_OBJECT) {
556                     count++;
557                 } else if (token == JsonToken.END_ARRAY || token == JsonToken.END_OBJECT) {
558                     count--;
559                 }
560             } while (count != 0);
561         } finally {
562             skipping = false;
563         }
564     }
565 
peekStack()566     private JsonScope peekStack() {
567         return stack.get(stack.size() - 1);
568     }
569 
pop()570     private JsonScope pop() {
571         return stack.remove(stack.size() - 1);
572     }
573 
push(JsonScope newTop)574     private void push(JsonScope newTop) {
575         stack.add(newTop);
576     }
577 
578     /**
579      * Replace the value on the top of the stack with the given value.
580      */
replaceTop(JsonScope newTop)581     private void replaceTop(JsonScope newTop) {
582         stack.set(stack.size() - 1, newTop);
583     }
584 
nextInArray(boolean firstElement)585     private JsonToken nextInArray(boolean firstElement) throws IOException {
586         if (firstElement) {
587             replaceTop(JsonScope.NONEMPTY_ARRAY);
588         } else {
589             /* Look for a comma before each element after the first element. */
590             switch (nextNonWhitespace()) {
591                 case ']':
592                     pop();
593                     return token = JsonToken.END_ARRAY;
594                 case ';':
595                     checkLenient(); // fall-through
596                 case ',':
597                     break;
598                 default:
599                     throw syntaxError("Unterminated array");
600             }
601         }
602 
603         switch (nextNonWhitespace()) {
604             case ']':
605                 if (firstElement) {
606                     pop();
607                     return token = JsonToken.END_ARRAY;
608                 }
609                 // fall-through to handle ",]"
610             case ';':
611             case ',':
612                 /* In lenient mode, a 0-length literal means 'null' */
613                 checkLenient();
614                 pos--;
615                 value = "null";
616                 return token = JsonToken.NULL;
617             default:
618                 pos--;
619                 return nextValue();
620         }
621     }
622 
nextInObject(boolean firstElement)623     private JsonToken nextInObject(boolean firstElement) throws IOException {
624         /*
625          * Read delimiters. Either a comma/semicolon separating this and the
626          * previous name-value pair, or a close brace to denote the end of the
627          * object.
628          */
629         if (firstElement) {
630             /* Peek to see if this is the empty object. */
631             switch (nextNonWhitespace()) {
632                 case '}':
633                     pop();
634                     return token = JsonToken.END_OBJECT;
635                 default:
636                     pos--;
637             }
638         } else {
639             switch (nextNonWhitespace()) {
640                 case '}':
641                     pop();
642                     return token = JsonToken.END_OBJECT;
643                 case ';':
644                 case ',':
645                     break;
646                 default:
647                     throw syntaxError("Unterminated object");
648             }
649         }
650 
651         /* Read the name. */
652         int quote = nextNonWhitespace();
653         switch (quote) {
654             case '\'':
655                 checkLenient(); // fall-through
656             case '"':
657                 name = nextString((char) quote);
658                 break;
659             default:
660                 checkLenient();
661                 pos--;
662                 name = nextLiteral(false);
663                 if (name.isEmpty()) {
664                     throw syntaxError("Expected name");
665                 }
666         }
667 
668         replaceTop(JsonScope.DANGLING_NAME);
669         return token = JsonToken.NAME;
670     }
671 
objectValue()672     private JsonToken objectValue() throws IOException {
673         /*
674          * Read the name/value separator. Usually a colon ':'. In lenient mode
675          * we also accept an equals sign '=', or an arrow "=>".
676          */
677         switch (nextNonWhitespace()) {
678             case ':':
679                 break;
680             case '=':
681                 checkLenient();
682                 if ((pos < limit || fillBuffer(1)) && buffer[pos] == '>') {
683                     pos++;
684                 }
685                 break;
686             default:
687                 throw syntaxError("Expected ':'");
688         }
689 
690         replaceTop(JsonScope.NONEMPTY_OBJECT);
691         return nextValue();
692     }
693 
nextValue()694     private JsonToken nextValue() throws IOException {
695         int c = nextNonWhitespace();
696         switch (c) {
697             case '{':
698                 push(JsonScope.EMPTY_OBJECT);
699                 return token = JsonToken.BEGIN_OBJECT;
700 
701             case '[':
702                 push(JsonScope.EMPTY_ARRAY);
703                 return token = JsonToken.BEGIN_ARRAY;
704 
705             case '\'':
706                 checkLenient(); // fall-through
707             case '"':
708                 value = nextString((char) c);
709                 return token = JsonToken.STRING;
710 
711             default:
712                 pos--;
713                 return readLiteral();
714         }
715     }
716 
717     /**
718      * Returns true once {@code limit - pos >= minimum}. If the data is
719      * exhausted before that many characters are available, this returns
720      * false.
721      */
fillBuffer(int minimum)722     private boolean fillBuffer(int minimum) throws IOException {
723         // Before clobbering the old characters, update where buffer starts
724         for (int i = 0; i < pos; i++) {
725             if (buffer[i] == '\n') {
726                 bufferStartLine++;
727                 bufferStartColumn = 1;
728             } else {
729                 bufferStartColumn++;
730             }
731         }
732 
733         if (limit != pos) {
734             limit -= pos;
735             System.arraycopy(buffer, pos, buffer, 0, limit);
736         } else {
737             limit = 0;
738         }
739 
740         pos = 0;
741         int total;
742         while ((total = in.read(buffer, limit, buffer.length - limit)) != -1) {
743             limit += total;
744 
745             // if this is the first read, consume an optional byte order mark (BOM) if it exists
746                 if (bufferStartLine == 1 && bufferStartColumn == 1
747                         && limit > 0 && buffer[0] == '\ufeff') {
748                 pos++;
749                 bufferStartColumn--;
750             }
751 
752             if (limit >= minimum) {
753                 return true;
754             }
755         }
756         return false;
757     }
758 
getLineNumber()759     private int getLineNumber() {
760         int result = bufferStartLine;
761         for (int i = 0; i < pos; i++) {
762             if (buffer[i] == '\n') {
763                 result++;
764             }
765         }
766         return result;
767     }
768 
getColumnNumber()769     private int getColumnNumber() {
770         int result = bufferStartColumn;
771         for (int i = 0; i < pos; i++) {
772             if (buffer[i] == '\n') {
773                 result = 1;
774             } else {
775                 result++;
776             }
777         }
778         return result;
779     }
780 
nextNonWhitespace()781     private int nextNonWhitespace() throws IOException {
782         while (pos < limit || fillBuffer(1)) {
783             int c = buffer[pos++];
784             switch (c) {
785                 case '\t':
786                 case ' ':
787                 case '\n':
788                 case '\r':
789                     continue;
790 
791                 case '/':
792                     if (pos == limit && !fillBuffer(1)) {
793                         return c;
794                     }
795 
796                     checkLenient();
797                     char peek = buffer[pos];
798                     switch (peek) {
799                         case '*':
800                             // skip a /* c-style comment */
801                             pos++;
802                             if (!skipTo("*/")) {
803                                 throw syntaxError("Unterminated comment");
804                             }
805                             pos += 2;
806                             continue;
807 
808                         case '/':
809                             // skip a // end-of-line comment
810                             pos++;
811                             skipToEndOfLine();
812                             continue;
813 
814                         default:
815                             return c;
816                     }
817 
818                 case '#':
819                     /*
820                      * Skip a # hash end-of-line comment. The JSON RFC doesn't
821                      * specify this behaviour, but it's required to parse
822                      * existing documents. See http://b/2571423.
823                      */
824                     checkLenient();
825                     skipToEndOfLine();
826                     continue;
827 
828                 default:
829                     return c;
830             }
831         }
832 
833         throw new EOFException("End of input");
834     }
835 
checkLenient()836     private void checkLenient() throws IOException {
837         if (!lenient) {
838             throw syntaxError("Use JsonReader.setLenient(true) to accept malformed JSON");
839         }
840     }
841 
842     /**
843      * Advances the position until after the next newline character. If the line
844      * is terminated by "\r\n", the '\n' must be consumed as whitespace by the
845      * caller.
846      */
skipToEndOfLine()847     private void skipToEndOfLine() throws IOException {
848         while (pos < limit || fillBuffer(1)) {
849             char c = buffer[pos++];
850             if (c == '\r' || c == '\n') {
851                 break;
852             }
853         }
854     }
855 
skipTo(String toFind)856     private boolean skipTo(String toFind) throws IOException {
857         outer:
858         for (; pos + toFind.length() <= limit || fillBuffer(toFind.length()); pos++) {
859             for (int c = 0; c < toFind.length(); c++) {
860                 if (buffer[pos + c] != toFind.charAt(c)) {
861                     continue outer;
862                 }
863             }
864             return true;
865         }
866         return false;
867     }
868 
869     /**
870      * Returns the string up to but not including {@code quote}, unescaping any
871      * character escape sequences encountered along the way. The opening quote
872      * should have already been read. This consumes the closing quote, but does
873      * not include it in the returned string.
874      *
875      * @param quote either ' or ".
876      * @throws NumberFormatException if any unicode escape sequences are
877      *     malformed.
878      */
nextString(char quote)879     private String nextString(char quote) throws IOException {
880         StringBuilder builder = null;
881         do {
882             /* the index of the first character not yet appended to the builder. */
883             int start = pos;
884             while (pos < limit) {
885                 int c = buffer[pos++];
886 
887                 if (c == quote) {
888                     if (skipping) {
889                         return "skipped!";
890                     } else if (builder == null) {
891                         return stringPool.get(buffer, start, pos - start - 1);
892                     } else {
893                         builder.append(buffer, start, pos - start - 1);
894                         return builder.toString();
895                     }
896 
897                 } else if (c == '\\') {
898                     if (builder == null) {
899                         builder = new StringBuilder();
900                     }
901                     builder.append(buffer, start, pos - start - 1);
902                     builder.append(readEscapeCharacter());
903                     start = pos;
904                 }
905             }
906 
907             if (builder == null) {
908                 builder = new StringBuilder();
909             }
910             builder.append(buffer, start, pos - start);
911         } while (fillBuffer(1));
912 
913         throw syntaxError("Unterminated string");
914     }
915 
916     /**
917      * Reads the value up to but not including any delimiter characters. This
918      * does not consume the delimiter character.
919      *
920      * @param assignOffsetsOnly true for this method to only set the valuePos
921      *     and valueLength fields and return a null result. This only works if
922      *     the literal is short; a string is returned otherwise.
923      */
nextLiteral(boolean assignOffsetsOnly)924     private String nextLiteral(boolean assignOffsetsOnly) throws IOException {
925         StringBuilder builder = null;
926         valuePos = -1;
927         valueLength = 0;
928         int i = 0;
929 
930         findNonLiteralCharacter:
931         while (true) {
932             for (; pos + i < limit; i++) {
933                 switch (buffer[pos + i]) {
934                 case '/':
935                 case '\\':
936                 case ';':
937                 case '#':
938                 case '=':
939                     checkLenient(); // fall-through
940                 case '{':
941                 case '}':
942                 case '[':
943                 case ']':
944                 case ':':
945                 case ',':
946                 case ' ':
947                 case '\t':
948                 case '\f':
949                 case '\r':
950                 case '\n':
951                     break findNonLiteralCharacter;
952                 }
953             }
954 
955             /*
956              * Attempt to load the entire literal into the buffer at once. If
957              * we run out of input, add a non-literal character at the end so
958              * that decoding doesn't need to do bounds checks.
959              */
960             if (i < buffer.length) {
961                 if (fillBuffer(i + 1)) {
962                     continue;
963                 } else {
964                     buffer[limit] = '\0';
965                     break;
966                 }
967             }
968 
969             // use a StringBuilder when the value is too long. It must be an unquoted string.
970             if (builder == null) {
971                 builder = new StringBuilder();
972             }
973             builder.append(buffer, pos, i);
974             valueLength += i;
975             pos += i;
976             i = 0;
977             if (!fillBuffer(1)) {
978                 break;
979             }
980         }
981 
982         String result;
983         if (assignOffsetsOnly && builder == null) {
984             valuePos = pos;
985             result = null;
986         } else if (skipping) {
987             result = "skipped!";
988         } else if (builder == null) {
989             result = stringPool.get(buffer, pos, i);
990         } else {
991             builder.append(buffer, pos, i);
992             result = builder.toString();
993         }
994         valueLength += i;
995         pos += i;
996         return result;
997     }
998 
toString()999     @Override public String toString() {
1000         return getClass().getSimpleName() + " near " + getSnippet();
1001     }
1002 
1003     /**
1004      * Unescapes the character identified by the character or characters that
1005      * immediately follow a backslash. The backslash '\' should have already
1006      * been read. This supports both unicode escapes "u000A" and two-character
1007      * escapes "\n".
1008      *
1009      * @throws NumberFormatException if any unicode escape sequences are
1010      *     malformed.
1011      */
readEscapeCharacter()1012     private char readEscapeCharacter() throws IOException {
1013         if (pos == limit && !fillBuffer(1)) {
1014             throw syntaxError("Unterminated escape sequence");
1015         }
1016 
1017         char escaped = buffer[pos++];
1018         switch (escaped) {
1019             case 'u':
1020                 if (pos + 4 > limit && !fillBuffer(4)) {
1021                     throw syntaxError("Unterminated escape sequence");
1022                 }
1023                 String hex = stringPool.get(buffer, pos, 4);
1024                 pos += 4;
1025                 return (char) Integer.parseInt(hex, 16);
1026 
1027             case 't':
1028                 return '\t';
1029 
1030             case 'b':
1031                 return '\b';
1032 
1033             case 'n':
1034                 return '\n';
1035 
1036             case 'r':
1037                 return '\r';
1038 
1039             case 'f':
1040                 return '\f';
1041 
1042             case '\'':
1043             case '"':
1044             case '\\':
1045             default:
1046                 return escaped;
1047         }
1048     }
1049 
1050     /**
1051      * Reads a null, boolean, numeric or unquoted string literal value.
1052      */
readLiteral()1053     private JsonToken readLiteral() throws IOException {
1054         value = nextLiteral(true);
1055         if (valueLength == 0) {
1056             throw syntaxError("Expected literal value");
1057         }
1058         token = decodeLiteral();
1059         if (token == JsonToken.STRING) {
1060           checkLenient();
1061         }
1062         return token;
1063     }
1064 
1065     /**
1066      * Assigns {@code nextToken} based on the value of {@code nextValue}.
1067      */
decodeLiteral()1068     private JsonToken decodeLiteral() throws IOException {
1069         if (valuePos == -1) {
1070             // it was too long to fit in the buffer so it can only be a string
1071             return JsonToken.STRING;
1072         } else if (valueLength == 4
1073                 && ('n' == buffer[valuePos    ] || 'N' == buffer[valuePos    ])
1074                 && ('u' == buffer[valuePos + 1] || 'U' == buffer[valuePos + 1])
1075                 && ('l' == buffer[valuePos + 2] || 'L' == buffer[valuePos + 2])
1076                 && ('l' == buffer[valuePos + 3] || 'L' == buffer[valuePos + 3])) {
1077             value = "null";
1078             return JsonToken.NULL;
1079         } else if (valueLength == 4
1080                 && ('t' == buffer[valuePos    ] || 'T' == buffer[valuePos    ])
1081                 && ('r' == buffer[valuePos + 1] || 'R' == buffer[valuePos + 1])
1082                 && ('u' == buffer[valuePos + 2] || 'U' == buffer[valuePos + 2])
1083                 && ('e' == buffer[valuePos + 3] || 'E' == buffer[valuePos + 3])) {
1084             value = TRUE;
1085             return JsonToken.BOOLEAN;
1086         } else if (valueLength == 5
1087                 && ('f' == buffer[valuePos    ] || 'F' == buffer[valuePos    ])
1088                 && ('a' == buffer[valuePos + 1] || 'A' == buffer[valuePos + 1])
1089                 && ('l' == buffer[valuePos + 2] || 'L' == buffer[valuePos + 2])
1090                 && ('s' == buffer[valuePos + 3] || 'S' == buffer[valuePos + 3])
1091                 && ('e' == buffer[valuePos + 4] || 'E' == buffer[valuePos + 4])) {
1092             value = FALSE;
1093             return JsonToken.BOOLEAN;
1094         } else {
1095             value = stringPool.get(buffer, valuePos, valueLength);
1096             return decodeNumber(buffer, valuePos, valueLength);
1097         }
1098     }
1099 
1100     /**
1101      * Determine whether the characters is a JSON number. Numbers are of the
1102      * form -12.34e+56. Fractional and exponential parts are optional. Leading
1103      * zeroes are not allowed in the value or exponential part, but are allowed
1104      * in the fraction.
1105      */
decodeNumber(char[] chars, int offset, int length)1106     private JsonToken decodeNumber(char[] chars, int offset, int length) {
1107         int i = offset;
1108         int c = chars[i];
1109 
1110         if (c == '-') {
1111             c = chars[++i];
1112         }
1113 
1114         if (c == '0') {
1115             c = chars[++i];
1116         } else if (c >= '1' && c <= '9') {
1117             c = chars[++i];
1118             while (c >= '0' && c <= '9') {
1119                 c = chars[++i];
1120             }
1121         } else {
1122             return JsonToken.STRING;
1123         }
1124 
1125         if (c == '.') {
1126             c = chars[++i];
1127             while (c >= '0' && c <= '9') {
1128                 c = chars[++i];
1129             }
1130         }
1131 
1132         if (c == 'e' || c == 'E') {
1133             c = chars[++i];
1134             if (c == '+' || c == '-') {
1135                 c = chars[++i];
1136             }
1137             if (c >= '0' && c <= '9') {
1138                 c = chars[++i];
1139                 while (c >= '0' && c <= '9') {
1140                     c = chars[++i];
1141                 }
1142             } else {
1143                 return JsonToken.STRING;
1144             }
1145         }
1146 
1147         if (i == offset + length) {
1148             return JsonToken.NUMBER;
1149         } else {
1150             return JsonToken.STRING;
1151         }
1152     }
1153 
1154     /**
1155      * Throws a new IO exception with the given message and a context snippet
1156      * with this reader's content.
1157      */
syntaxError(String message)1158     private IOException syntaxError(String message) throws IOException {
1159         throw new MalformedJsonException(message
1160                 + " at line " + getLineNumber() + " column " + getColumnNumber());
1161     }
1162 
getSnippet()1163     private CharSequence getSnippet() {
1164         StringBuilder snippet = new StringBuilder();
1165         int beforePos = Math.min(pos, 20);
1166         snippet.append(buffer, pos - beforePos, beforePos);
1167         int afterPos = Math.min(limit - pos, 20);
1168         snippet.append(buffer, pos, afterPos);
1169         return snippet;
1170     }
1171 }
1172