1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 package com.google.protobuf;
32 
33 import com.google.protobuf.Descriptors.Descriptor;
34 import com.google.protobuf.Descriptors.EnumDescriptor;
35 import com.google.protobuf.Descriptors.EnumValueDescriptor;
36 import com.google.protobuf.Descriptors.FieldDescriptor;
37 
38 import java.io.IOException;
39 import java.math.BigInteger;
40 import java.nio.CharBuffer;
41 import java.util.ArrayList;
42 import java.util.List;
43 import java.util.Locale;
44 import java.util.Map;
45 import java.util.logging.Logger;
46 import java.util.regex.Matcher;
47 import java.util.regex.Pattern;
48 
49 /**
50  * Provide text parsing and formatting support for proto2 instances.
51  * The implementation largely follows google/protobuf/text_format.cc.
52  *
53  * @author wenboz@google.com Wenbo Zhu
54  * @author kenton@google.com Kenton Varda
55  */
56 public final class TextFormat {
TextFormat()57   private TextFormat() {}
58 
59   private static final Logger logger =
60       Logger.getLogger(TextFormat.class.getName());
61 
62   private static final Printer DEFAULT_PRINTER = new Printer();
63   private static final Printer SINGLE_LINE_PRINTER =
64       (new Printer()).setSingleLineMode(true);
65   private static final Printer UNICODE_PRINTER =
66       (new Printer()).setEscapeNonAscii(false);
67 
68   /**
69    * Outputs a textual representation of the Protocol Message supplied into
70    * the parameter output. (This representation is the new version of the
71    * classic "ProtocolPrinter" output from the original Protocol Buffer system)
72    */
print( final MessageOrBuilder message, final Appendable output)73   public static void print(
74       final MessageOrBuilder message, final Appendable output)
75       throws IOException {
76     DEFAULT_PRINTER.print(message, new TextGenerator(output));
77   }
78 
79   /** Outputs a textual representation of {@code fields} to {@code output}. */
print(final UnknownFieldSet fields, final Appendable output)80   public static void print(final UnknownFieldSet fields,
81                            final Appendable output)
82                            throws IOException {
83     DEFAULT_PRINTER.printUnknownFields(fields, new TextGenerator(output));
84   }
85 
86   /**
87    * Same as {@code print()}, except that non-ASCII characters are not
88    * escaped.
89    */
printUnicode( final MessageOrBuilder message, final Appendable output)90   public static void printUnicode(
91       final MessageOrBuilder message, final Appendable output)
92       throws IOException {
93     UNICODE_PRINTER.print(message, new TextGenerator(output));
94   }
95 
96   /**
97    * Same as {@code print()}, except that non-ASCII characters are not
98    * escaped.
99    */
printUnicode(final UnknownFieldSet fields, final Appendable output)100   public static void printUnicode(final UnknownFieldSet fields,
101                                   final Appendable output)
102                                   throws IOException {
103     UNICODE_PRINTER.printUnknownFields(fields, new TextGenerator(output));
104   }
105 
106   /**
107    * Generates a human readable form of this message, useful for debugging and
108    * other purposes, with no newline characters.
109    */
shortDebugString(final MessageOrBuilder message)110   public static String shortDebugString(final MessageOrBuilder message) {
111     try {
112       final StringBuilder sb = new StringBuilder();
113       SINGLE_LINE_PRINTER.print(message, new TextGenerator(sb));
114       // Single line mode currently might have an extra space at the end.
115       return sb.toString().trim();
116     } catch (IOException e) {
117       throw new IllegalStateException(e);
118     }
119   }
120 
121   /**
122    * Generates a human readable form of the field, useful for debugging
123    * and other purposes, with no newline characters.
124    */
shortDebugString(final FieldDescriptor field, final Object value)125   public static String shortDebugString(final FieldDescriptor field,
126                                         final Object value) {
127     try {
128       final StringBuilder sb = new StringBuilder();
129       SINGLE_LINE_PRINTER.printField(field, value, new TextGenerator(sb));
130       return sb.toString().trim();
131     } catch (IOException e) {
132         throw new IllegalStateException(e);
133     }
134   }
135 
136   /**
137    * Generates a human readable form of the unknown fields, useful for debugging
138    * and other purposes, with no newline characters.
139    */
shortDebugString(final UnknownFieldSet fields)140   public static String shortDebugString(final UnknownFieldSet fields) {
141     try {
142       final StringBuilder sb = new StringBuilder();
143       SINGLE_LINE_PRINTER.printUnknownFields(fields, new TextGenerator(sb));
144       // Single line mode currently might have an extra space at the end.
145       return sb.toString().trim();
146     } catch (IOException e) {
147       throw new IllegalStateException(e);
148     }
149   }
150 
151   /**
152    * Like {@code print()}, but writes directly to a {@code String} and
153    * returns it.
154    */
printToString(final MessageOrBuilder message)155   public static String printToString(final MessageOrBuilder message) {
156     try {
157       final StringBuilder text = new StringBuilder();
158       print(message, text);
159       return text.toString();
160     } catch (IOException e) {
161       throw new IllegalStateException(e);
162     }
163   }
164 
165   /**
166    * Like {@code print()}, but writes directly to a {@code String} and
167    * returns it.
168    */
printToString(final UnknownFieldSet fields)169   public static String printToString(final UnknownFieldSet fields) {
170     try {
171       final StringBuilder text = new StringBuilder();
172       print(fields, text);
173       return text.toString();
174     } catch (IOException e) {
175       throw new IllegalStateException(e);
176     }
177   }
178 
179   /**
180    * Same as {@code printToString()}, except that non-ASCII characters
181    * in string type fields are not escaped in backslash+octals.
182    */
printToUnicodeString(final MessageOrBuilder message)183   public static String printToUnicodeString(final MessageOrBuilder message) {
184     try {
185       final StringBuilder text = new StringBuilder();
186       UNICODE_PRINTER.print(message, new TextGenerator(text));
187       return text.toString();
188     } catch (IOException e) {
189       throw new IllegalStateException(e);
190     }
191   }
192 
193   /**
194    * Same as {@code printToString()}, except that non-ASCII characters
195    * in string type fields are not escaped in backslash+octals.
196    */
printToUnicodeString(final UnknownFieldSet fields)197   public static String printToUnicodeString(final UnknownFieldSet fields) {
198     try {
199       final StringBuilder text = new StringBuilder();
200       UNICODE_PRINTER.printUnknownFields(fields, new TextGenerator(text));
201       return text.toString();
202     } catch (IOException e) {
203       throw new IllegalStateException(e);
204     }
205   }
206 
printField(final FieldDescriptor field, final Object value, final Appendable output)207   public static void printField(final FieldDescriptor field,
208                                 final Object value,
209                                 final Appendable output)
210                                 throws IOException {
211     DEFAULT_PRINTER.printField(field, value, new TextGenerator(output));
212   }
213 
printFieldToString(final FieldDescriptor field, final Object value)214   public static String printFieldToString(final FieldDescriptor field,
215                                           final Object value) {
216     try {
217       final StringBuilder text = new StringBuilder();
218       printField(field, value, text);
219       return text.toString();
220     } catch (IOException e) {
221       throw new IllegalStateException(e);
222     }
223   }
224 
225   /**
226    * Outputs a textual representation of the value of given field value.
227    *
228    * @param field the descriptor of the field
229    * @param value the value of the field
230    * @param output the output to which to append the formatted value
231    * @throws ClassCastException if the value is not appropriate for the
232    *     given field descriptor
233    * @throws IOException if there is an exception writing to the output
234    */
printFieldValue(final FieldDescriptor field, final Object value, final Appendable output)235   public static void printFieldValue(final FieldDescriptor field,
236                                      final Object value,
237                                      final Appendable output)
238                                      throws IOException {
239     DEFAULT_PRINTER.printFieldValue(field, value, new TextGenerator(output));
240   }
241 
242   /**
243    * Outputs a textual representation of the value of an unknown field.
244    *
245    * @param tag the field's tag number
246    * @param value the value of the field
247    * @param output the output to which to append the formatted value
248    * @throws ClassCastException if the value is not appropriate for the
249    *     given field descriptor
250    * @throws IOException if there is an exception writing to the output
251    */
printUnknownFieldValue(final int tag, final Object value, final Appendable output)252   public static void printUnknownFieldValue(final int tag,
253                                             final Object value,
254                                             final Appendable output)
255                                             throws IOException {
256     printUnknownFieldValue(tag, value, new TextGenerator(output));
257   }
258 
printUnknownFieldValue(final int tag, final Object value, final TextGenerator generator)259   private static void printUnknownFieldValue(final int tag,
260                                              final Object value,
261                                              final TextGenerator generator)
262                                              throws IOException {
263     switch (WireFormat.getTagWireType(tag)) {
264       case WireFormat.WIRETYPE_VARINT:
265         generator.print(unsignedToString((Long) value));
266         break;
267       case WireFormat.WIRETYPE_FIXED32:
268         generator.print(
269             String.format((Locale) null, "0x%08x", (Integer) value));
270         break;
271       case WireFormat.WIRETYPE_FIXED64:
272         generator.print(String.format((Locale) null, "0x%016x", (Long) value));
273         break;
274       case WireFormat.WIRETYPE_LENGTH_DELIMITED:
275         generator.print("\"");
276         generator.print(escapeBytes((ByteString) value));
277         generator.print("\"");
278         break;
279       case WireFormat.WIRETYPE_START_GROUP:
280         DEFAULT_PRINTER.printUnknownFields((UnknownFieldSet) value, generator);
281         break;
282       default:
283         throw new IllegalArgumentException("Bad tag: " + tag);
284     }
285   }
286 
287   /** Helper class for converting protobufs to text. */
288   private static final class Printer {
289     /** Whether to omit newlines from the output. */
290     boolean singleLineMode = false;
291 
292     /** Whether to escape non ASCII characters with backslash and octal. */
293     boolean escapeNonAscii = true;
294 
Printer()295     private Printer() {}
296 
297     /** Setter of singleLineMode */
setSingleLineMode(boolean singleLineMode)298     private Printer setSingleLineMode(boolean singleLineMode) {
299       this.singleLineMode = singleLineMode;
300       return this;
301     }
302 
303     /** Setter of escapeNonAscii */
setEscapeNonAscii(boolean escapeNonAscii)304     private Printer setEscapeNonAscii(boolean escapeNonAscii) {
305       this.escapeNonAscii = escapeNonAscii;
306       return this;
307     }
308 
print( final MessageOrBuilder message, final TextGenerator generator)309     private void print(
310         final MessageOrBuilder message, final TextGenerator generator)
311         throws IOException {
312       for (Map.Entry<FieldDescriptor, Object> field
313           : message.getAllFields().entrySet()) {
314         printField(field.getKey(), field.getValue(), generator);
315       }
316       printUnknownFields(message.getUnknownFields(), generator);
317     }
318 
printField(final FieldDescriptor field, final Object value, final TextGenerator generator)319     private void printField(final FieldDescriptor field, final Object value,
320         final TextGenerator generator) throws IOException {
321       if (field.isRepeated()) {
322         // Repeated field.  Print each element.
323         for (Object element : (List<?>) value) {
324           printSingleField(field, element, generator);
325         }
326       } else {
327         printSingleField(field, value, generator);
328       }
329     }
330 
printSingleField(final FieldDescriptor field, final Object value, final TextGenerator generator)331     private void printSingleField(final FieldDescriptor field,
332                                   final Object value,
333                                   final TextGenerator generator)
334                                   throws IOException {
335       if (field.isExtension()) {
336         generator.print("[");
337         // We special-case MessageSet elements for compatibility with proto1.
338         if (field.getContainingType().getOptions().getMessageSetWireFormat()
339             && (field.getType() == FieldDescriptor.Type.MESSAGE)
340             && (field.isOptional())
341             // object equality
342             && (field.getExtensionScope() == field.getMessageType())) {
343           generator.print(field.getMessageType().getFullName());
344         } else {
345           generator.print(field.getFullName());
346         }
347         generator.print("]");
348       } else {
349         if (field.getType() == FieldDescriptor.Type.GROUP) {
350           // Groups must be serialized with their original capitalization.
351           generator.print(field.getMessageType().getName());
352         } else {
353           generator.print(field.getName());
354         }
355       }
356 
357       if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
358         if (singleLineMode) {
359           generator.print(" { ");
360         } else {
361           generator.print(" {\n");
362           generator.indent();
363         }
364       } else {
365         generator.print(": ");
366       }
367 
368       printFieldValue(field, value, generator);
369 
370       if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
371         if (singleLineMode) {
372           generator.print("} ");
373         } else {
374           generator.outdent();
375           generator.print("}\n");
376         }
377       } else {
378         if (singleLineMode) {
379           generator.print(" ");
380         } else {
381           generator.print("\n");
382         }
383       }
384     }
385 
printFieldValue(final FieldDescriptor field, final Object value, final TextGenerator generator)386     private void printFieldValue(final FieldDescriptor field,
387                                  final Object value,
388                                  final TextGenerator generator)
389                                  throws IOException {
390       switch (field.getType()) {
391         case INT32:
392         case SINT32:
393         case SFIXED32:
394           generator.print(((Integer) value).toString());
395           break;
396 
397         case INT64:
398         case SINT64:
399         case SFIXED64:
400           generator.print(((Long) value).toString());
401           break;
402 
403         case BOOL:
404           generator.print(((Boolean) value).toString());
405           break;
406 
407         case FLOAT:
408           generator.print(((Float) value).toString());
409           break;
410 
411         case DOUBLE:
412           generator.print(((Double) value).toString());
413           break;
414 
415         case UINT32:
416         case FIXED32:
417           generator.print(unsignedToString((Integer) value));
418           break;
419 
420         case UINT64:
421         case FIXED64:
422           generator.print(unsignedToString((Long) value));
423           break;
424 
425         case STRING:
426           generator.print("\"");
427           generator.print(escapeNonAscii
428               ? TextFormatEscaper.escapeText((String) value)
429               : escapeDoubleQuotesAndBackslashes((String) value)
430                   .replace("\n", "\\n"));
431           generator.print("\"");
432           break;
433 
434         case BYTES:
435           generator.print("\"");
436           if (value instanceof ByteString) {
437             generator.print(escapeBytes((ByteString) value));
438           } else {
439             generator.print(escapeBytes((byte[]) value));
440           }
441           generator.print("\"");
442           break;
443 
444         case ENUM:
445           generator.print(((EnumValueDescriptor) value).getName());
446           break;
447 
448         case MESSAGE:
449         case GROUP:
450           print((Message) value, generator);
451           break;
452       }
453     }
454 
printUnknownFields(final UnknownFieldSet unknownFields, final TextGenerator generator)455     private void printUnknownFields(final UnknownFieldSet unknownFields,
456                                     final TextGenerator generator)
457                                     throws IOException {
458       for (Map.Entry<Integer, UnknownFieldSet.Field> entry :
459                unknownFields.asMap().entrySet()) {
460         final int number = entry.getKey();
461         final UnknownFieldSet.Field field = entry.getValue();
462         printUnknownField(number, WireFormat.WIRETYPE_VARINT,
463             field.getVarintList(), generator);
464         printUnknownField(number, WireFormat.WIRETYPE_FIXED32,
465             field.getFixed32List(), generator);
466         printUnknownField(number, WireFormat.WIRETYPE_FIXED64,
467             field.getFixed64List(), generator);
468         printUnknownField(number, WireFormat.WIRETYPE_LENGTH_DELIMITED,
469             field.getLengthDelimitedList(), generator);
470         for (final UnknownFieldSet value : field.getGroupList()) {
471           generator.print(entry.getKey().toString());
472           if (singleLineMode) {
473             generator.print(" { ");
474           } else {
475             generator.print(" {\n");
476             generator.indent();
477           }
478           printUnknownFields(value, generator);
479           if (singleLineMode) {
480             generator.print("} ");
481           } else {
482             generator.outdent();
483             generator.print("}\n");
484           }
485         }
486       }
487     }
488 
printUnknownField(final int number, final int wireType, final List<?> values, final TextGenerator generator)489     private void printUnknownField(final int number,
490                                    final int wireType,
491                                    final List<?> values,
492                                    final TextGenerator generator)
493                                    throws IOException {
494       for (final Object value : values) {
495         generator.print(String.valueOf(number));
496         generator.print(": ");
497         printUnknownFieldValue(wireType, value, generator);
498         generator.print(singleLineMode ? " " : "\n");
499       }
500     }
501   }
502 
503   /** Convert an unsigned 32-bit integer to a string. */
unsignedToString(final int value)504   public static String unsignedToString(final int value) {
505     if (value >= 0) {
506       return Integer.toString(value);
507     } else {
508       return Long.toString(value & 0x00000000FFFFFFFFL);
509     }
510   }
511 
512   /** Convert an unsigned 64-bit integer to a string. */
unsignedToString(final long value)513   public static String unsignedToString(final long value) {
514     if (value >= 0) {
515       return Long.toString(value);
516     } else {
517       // Pull off the most-significant bit so that BigInteger doesn't think
518       // the number is negative, then set it again using setBit().
519       return BigInteger.valueOf(value & 0x7FFFFFFFFFFFFFFFL)
520                        .setBit(63).toString();
521     }
522   }
523 
524   /**
525    * An inner class for writing text to the output stream.
526    */
527   private static final class TextGenerator {
528     private final Appendable output;
529     private final StringBuilder indent = new StringBuilder();
530     private boolean atStartOfLine = true;
531 
TextGenerator(final Appendable output)532     private TextGenerator(final Appendable output) {
533       this.output = output;
534     }
535 
536     /**
537      * Indent text by two spaces.  After calling Indent(), two spaces will be
538      * inserted at the beginning of each line of text.  Indent() may be called
539      * multiple times to produce deeper indents.
540      */
indent()541     public void indent() {
542       indent.append("  ");
543     }
544 
545     /**
546      * Reduces the current indent level by two spaces, or crashes if the indent
547      * level is zero.
548      */
outdent()549     public void outdent() {
550       final int length = indent.length();
551       if (length == 0) {
552         throw new IllegalArgumentException(
553             " Outdent() without matching Indent().");
554       }
555       indent.delete(length - 2, length);
556     }
557 
558     /**
559      * Print text to the output stream.
560      */
print(final CharSequence text)561     public void print(final CharSequence text) throws IOException {
562       final int size = text.length();
563       int pos = 0;
564 
565       for (int i = 0; i < size; i++) {
566         if (text.charAt(i) == '\n') {
567           write(text.subSequence(pos, i + 1));
568           pos = i + 1;
569           atStartOfLine = true;
570         }
571       }
572       write(text.subSequence(pos, size));
573     }
574 
write(final CharSequence data)575     private void write(final CharSequence data) throws IOException {
576       if (data.length() == 0) {
577         return;
578       }
579       if (atStartOfLine) {
580         atStartOfLine = false;
581         output.append(indent);
582       }
583       output.append(data);
584     }
585   }
586 
587   // =================================================================
588   // Parsing
589 
590   /**
591    * Represents a stream of tokens parsed from a {@code String}.
592    *
593    * <p>The Java standard library provides many classes that you might think
594    * would be useful for implementing this, but aren't.  For example:
595    *
596    * <ul>
597    * <li>{@code java.io.StreamTokenizer}:  This almost does what we want -- or,
598    *   at least, something that would get us close to what we want -- except
599    *   for one fatal flaw:  It automatically un-escapes strings using Java
600    *   escape sequences, which do not include all the escape sequences we
601    *   need to support (e.g. '\x').
602    * <li>{@code java.util.Scanner}:  This seems like a great way at least to
603    *   parse regular expressions out of a stream (so we wouldn't have to load
604    *   the entire input into a single string before parsing).  Sadly,
605    *   {@code Scanner} requires that tokens be delimited with some delimiter.
606    *   Thus, although the text "foo:" should parse to two tokens ("foo" and
607    *   ":"), {@code Scanner} would recognize it only as a single token.
608    *   Furthermore, {@code Scanner} provides no way to inspect the contents
609    *   of delimiters, making it impossible to keep track of line and column
610    *   numbers.
611    * </ul>
612    *
613    * <p>Luckily, Java's regular expression support does manage to be useful to
614    * us.  (Barely:  We need {@code Matcher.usePattern()}, which is new in
615    * Java 1.5.)  So, we can use that, at least.  Unfortunately, this implies
616    * that we need to have the entire input in one contiguous string.
617    */
618   private static final class Tokenizer {
619     private final CharSequence text;
620     private final Matcher matcher;
621     private String currentToken;
622 
623     // The character index within this.text at which the current token begins.
624     private int pos = 0;
625 
626     // The line and column numbers of the current token.
627     private int line = 0;
628     private int column = 0;
629 
630     // The line and column numbers of the previous token (allows throwing
631     // errors *after* consuming).
632     private int previousLine = 0;
633     private int previousColumn = 0;
634 
635     // We use possessive quantifiers (*+ and ++) because otherwise the Java
636     // regex matcher has stack overflows on large inputs.
637     private static final Pattern WHITESPACE =
638       Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE);
639     private static final Pattern TOKEN = Pattern.compile(
640       "[a-zA-Z_][0-9a-zA-Z_+-]*+|" +                // an identifier
641       "[.]?[0-9+-][0-9a-zA-Z_.+-]*+|" +             // a number
642       "\"([^\"\n\\\\]|\\\\.)*+(\"|\\\\?$)|" +       // a double-quoted string
643       "\'([^\'\n\\\\]|\\\\.)*+(\'|\\\\?$)",         // a single-quoted string
644       Pattern.MULTILINE);
645 
646     private static final Pattern DOUBLE_INFINITY = Pattern.compile(
647       "-?inf(inity)?",
648       Pattern.CASE_INSENSITIVE);
649     private static final Pattern FLOAT_INFINITY = Pattern.compile(
650       "-?inf(inity)?f?",
651       Pattern.CASE_INSENSITIVE);
652     private static final Pattern FLOAT_NAN = Pattern.compile(
653       "nanf?",
654       Pattern.CASE_INSENSITIVE);
655 
656     /** Construct a tokenizer that parses tokens from the given text. */
Tokenizer(final CharSequence text)657     private Tokenizer(final CharSequence text) {
658       this.text = text;
659       this.matcher = WHITESPACE.matcher(text);
660       skipWhitespace();
661       nextToken();
662     }
663 
getPreviousLine()664     int getPreviousLine() {
665       return previousLine;
666     }
667 
getPreviousColumn()668     int getPreviousColumn() {
669       return previousColumn;
670     }
671 
getLine()672     int getLine() {
673       return line;
674     }
675 
getColumn()676     int getColumn() {
677       return column;
678     }
679 
680     /** Are we at the end of the input? */
atEnd()681     public boolean atEnd() {
682       return currentToken.length() == 0;
683     }
684 
685     /** Advance to the next token. */
nextToken()686     public void nextToken() {
687       previousLine = line;
688       previousColumn = column;
689 
690       // Advance the line counter to the current position.
691       while (pos < matcher.regionStart()) {
692         if (text.charAt(pos) == '\n') {
693           ++line;
694           column = 0;
695         } else {
696           ++column;
697         }
698         ++pos;
699       }
700 
701       // Match the next token.
702       if (matcher.regionStart() == matcher.regionEnd()) {
703         // EOF
704         currentToken = "";
705       } else {
706         matcher.usePattern(TOKEN);
707         if (matcher.lookingAt()) {
708           currentToken = matcher.group();
709           matcher.region(matcher.end(), matcher.regionEnd());
710         } else {
711           // Take one character.
712           currentToken = String.valueOf(text.charAt(pos));
713           matcher.region(pos + 1, matcher.regionEnd());
714         }
715 
716         skipWhitespace();
717       }
718     }
719 
720     /**
721      * Skip over any whitespace so that the matcher region starts at the next
722      * token.
723      */
skipWhitespace()724     private void skipWhitespace() {
725       matcher.usePattern(WHITESPACE);
726       if (matcher.lookingAt()) {
727         matcher.region(matcher.end(), matcher.regionEnd());
728       }
729     }
730 
731     /**
732      * If the next token exactly matches {@code token}, consume it and return
733      * {@code true}.  Otherwise, return {@code false} without doing anything.
734      */
tryConsume(final String token)735     public boolean tryConsume(final String token) {
736       if (currentToken.equals(token)) {
737         nextToken();
738         return true;
739       } else {
740         return false;
741       }
742     }
743 
744     /**
745      * If the next token exactly matches {@code token}, consume it.  Otherwise,
746      * throw a {@link ParseException}.
747      */
consume(final String token)748     public void consume(final String token) throws ParseException {
749       if (!tryConsume(token)) {
750         throw parseException("Expected \"" + token + "\".");
751       }
752     }
753 
754     /**
755      * Returns {@code true} if the next token is an integer, but does
756      * not consume it.
757      */
lookingAtInteger()758     public boolean lookingAtInteger() {
759       if (currentToken.length() == 0) {
760         return false;
761       }
762 
763       final char c = currentToken.charAt(0);
764       return ('0' <= c && c <= '9')
765           || c == '-' || c == '+';
766     }
767 
768     /**
769      * Returns {@code true} if the current token's text is equal to that
770      * specified.
771      */
lookingAt(String text)772     public boolean lookingAt(String text) {
773       return currentToken.equals(text);
774     }
775 
776     /**
777      * If the next token is an identifier, consume it and return its value.
778      * Otherwise, throw a {@link ParseException}.
779      */
consumeIdentifier()780     public String consumeIdentifier() throws ParseException {
781       for (int i = 0; i < currentToken.length(); i++) {
782         final char c = currentToken.charAt(i);
783         if (('a' <= c && c <= 'z')
784             || ('A' <= c && c <= 'Z')
785             || ('0' <= c && c <= '9')
786             || (c == '_') || (c == '.')) {
787           // OK
788         } else {
789           throw parseException(
790               "Expected identifier. Found '" + currentToken + "'");
791         }
792       }
793 
794       final String result = currentToken;
795       nextToken();
796       return result;
797     }
798 
799     /**
800      * If the next token is an identifier, consume it and return {@code true}.
801      * Otherwise, return {@code false} without doing anything.
802      */
tryConsumeIdentifier()803     public boolean tryConsumeIdentifier() {
804       try {
805         consumeIdentifier();
806         return true;
807       } catch (ParseException e) {
808         return false;
809       }
810     }
811 
812     /**
813      * If the next token is a 32-bit signed integer, consume it and return its
814      * value.  Otherwise, throw a {@link ParseException}.
815      */
consumeInt32()816     public int consumeInt32() throws ParseException {
817       try {
818         final int result = parseInt32(currentToken);
819         nextToken();
820         return result;
821       } catch (NumberFormatException e) {
822         throw integerParseException(e);
823       }
824     }
825 
826     /**
827      * If the next token is a 32-bit unsigned integer, consume it and return its
828      * value.  Otherwise, throw a {@link ParseException}.
829      */
consumeUInt32()830     public int consumeUInt32() throws ParseException {
831       try {
832         final int result = parseUInt32(currentToken);
833         nextToken();
834         return result;
835       } catch (NumberFormatException e) {
836         throw integerParseException(e);
837       }
838     }
839 
840     /**
841      * If the next token is a 64-bit signed integer, consume it and return its
842      * value.  Otherwise, throw a {@link ParseException}.
843      */
consumeInt64()844     public long consumeInt64() throws ParseException {
845       try {
846         final long result = parseInt64(currentToken);
847         nextToken();
848         return result;
849       } catch (NumberFormatException e) {
850         throw integerParseException(e);
851       }
852     }
853 
854     /**
855      * If the next token is a 64-bit signed integer, consume it and return
856      * {@code true}.  Otherwise, return {@code false} without doing anything.
857      */
tryConsumeInt64()858     public boolean tryConsumeInt64() {
859       try {
860         consumeInt64();
861         return true;
862       } catch (ParseException e) {
863         return false;
864       }
865     }
866 
867     /**
868      * If the next token is a 64-bit unsigned integer, consume it and return its
869      * value.  Otherwise, throw a {@link ParseException}.
870      */
consumeUInt64()871     public long consumeUInt64() throws ParseException {
872       try {
873         final long result = parseUInt64(currentToken);
874         nextToken();
875         return result;
876       } catch (NumberFormatException e) {
877         throw integerParseException(e);
878       }
879     }
880 
881     /**
882      * If the next token is a 64-bit unsigned integer, consume it and return
883      * {@code true}.  Otherwise, return {@code false} without doing anything.
884      */
tryConsumeUInt64()885     public boolean tryConsumeUInt64() {
886       try {
887         consumeUInt64();
888         return true;
889       } catch (ParseException e) {
890         return false;
891       }
892     }
893 
894     /**
895      * If the next token is a double, consume it and return its value.
896      * Otherwise, throw a {@link ParseException}.
897      */
consumeDouble()898     public double consumeDouble() throws ParseException {
899       // We need to parse infinity and nan separately because
900       // Double.parseDouble() does not accept "inf", "infinity", or "nan".
901       if (DOUBLE_INFINITY.matcher(currentToken).matches()) {
902         final boolean negative = currentToken.startsWith("-");
903         nextToken();
904         return negative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
905       }
906       if (currentToken.equalsIgnoreCase("nan")) {
907         nextToken();
908         return Double.NaN;
909       }
910       try {
911         final double result = Double.parseDouble(currentToken);
912         nextToken();
913         return result;
914       } catch (NumberFormatException e) {
915         throw floatParseException(e);
916       }
917     }
918 
919     /**
920      * If the next token is a double, consume it and return {@code true}.
921      * Otherwise, return {@code false} without doing anything.
922      */
tryConsumeDouble()923     public boolean tryConsumeDouble() {
924       try {
925         consumeDouble();
926         return true;
927       } catch (ParseException e) {
928         return false;
929       }
930     }
931 
932     /**
933      * If the next token is a float, consume it and return its value.
934      * Otherwise, throw a {@link ParseException}.
935      */
consumeFloat()936     public float consumeFloat() throws ParseException {
937       // We need to parse infinity and nan separately because
938       // Float.parseFloat() does not accept "inf", "infinity", or "nan".
939       if (FLOAT_INFINITY.matcher(currentToken).matches()) {
940         final boolean negative = currentToken.startsWith("-");
941         nextToken();
942         return negative ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY;
943       }
944       if (FLOAT_NAN.matcher(currentToken).matches()) {
945         nextToken();
946         return Float.NaN;
947       }
948       try {
949         final float result = Float.parseFloat(currentToken);
950         nextToken();
951         return result;
952       } catch (NumberFormatException e) {
953         throw floatParseException(e);
954       }
955     }
956 
957     /**
958      * If the next token is a float, consume it and return {@code true}.
959      * Otherwise, return {@code false} without doing anything.
960      */
tryConsumeFloat()961     public boolean tryConsumeFloat() {
962       try {
963         consumeFloat();
964         return true;
965       } catch (ParseException e) {
966         return false;
967       }
968     }
969 
970     /**
971      * If the next token is a boolean, consume it and return its value.
972      * Otherwise, throw a {@link ParseException}.
973      */
consumeBoolean()974     public boolean consumeBoolean() throws ParseException {
975       if (currentToken.equals("true")
976           || currentToken.equals("True")
977           || currentToken.equals("t")
978           || currentToken.equals("1")) {
979         nextToken();
980         return true;
981       } else if (currentToken.equals("false")
982           || currentToken.equals("False")
983           || currentToken.equals("f")
984           || currentToken.equals("0")) {
985         nextToken();
986         return false;
987       } else {
988         throw parseException("Expected \"true\" or \"false\".");
989       }
990     }
991 
992     /**
993      * If the next token is a string, consume it and return its (unescaped)
994      * value.  Otherwise, throw a {@link ParseException}.
995      */
consumeString()996     public String consumeString() throws ParseException {
997       return consumeByteString().toStringUtf8();
998     }
999 
1000     /**
1001      * If the next token is a string, consume it and return true.  Otherwise,
1002      * return false.
1003      */
tryConsumeString()1004     public boolean tryConsumeString() {
1005       try {
1006         consumeString();
1007         return true;
1008       } catch (ParseException e) {
1009         return false;
1010       }
1011     }
1012 
1013     /**
1014      * If the next token is a string, consume it, unescape it as a
1015      * {@link ByteString}, and return it.  Otherwise, throw a
1016      * {@link ParseException}.
1017      */
consumeByteString()1018     public ByteString consumeByteString() throws ParseException {
1019       List<ByteString> list = new ArrayList<ByteString>();
1020       consumeByteString(list);
1021       while (currentToken.startsWith("'") || currentToken.startsWith("\"")) {
1022         consumeByteString(list);
1023       }
1024       return ByteString.copyFrom(list);
1025     }
1026 
1027     /**
1028      * Like {@link #consumeByteString()} but adds each token of the string to
1029      * the given list.  String literals (whether bytes or text) may come in
1030      * multiple adjacent tokens which are automatically concatenated, like in
1031      * C or Python.
1032      */
consumeByteString(List<ByteString> list)1033     private void consumeByteString(List<ByteString> list)
1034         throws ParseException {
1035       final char quote = currentToken.length() > 0
1036           ? currentToken.charAt(0)
1037           : '\0';
1038       if (quote != '\"' && quote != '\'') {
1039         throw parseException("Expected string.");
1040       }
1041 
1042       if (currentToken.length() < 2
1043           || currentToken.charAt(currentToken.length() - 1) != quote) {
1044         throw parseException("String missing ending quote.");
1045       }
1046 
1047       try {
1048         final String escaped =
1049             currentToken.substring(1, currentToken.length() - 1);
1050         final ByteString result = unescapeBytes(escaped);
1051         nextToken();
1052         list.add(result);
1053       } catch (InvalidEscapeSequenceException e) {
1054         throw parseException(e.getMessage());
1055       }
1056     }
1057 
1058     /**
1059      * Returns a {@link ParseException} with the current line and column
1060      * numbers in the description, suitable for throwing.
1061      */
parseException(final String description)1062     public ParseException parseException(final String description) {
1063       // Note:  People generally prefer one-based line and column numbers.
1064       return new ParseException(
1065         line + 1, column + 1, description);
1066     }
1067 
1068     /**
1069      * Returns a {@link ParseException} with the line and column numbers of
1070      * the previous token in the description, suitable for throwing.
1071      */
parseExceptionPreviousToken( final String description)1072     public ParseException parseExceptionPreviousToken(
1073         final String description) {
1074       // Note:  People generally prefer one-based line and column numbers.
1075       return new ParseException(
1076         previousLine + 1, previousColumn + 1, description);
1077     }
1078 
1079     /**
1080      * Constructs an appropriate {@link ParseException} for the given
1081      * {@code NumberFormatException} when trying to parse an integer.
1082      */
integerParseException( final NumberFormatException e)1083     private ParseException integerParseException(
1084         final NumberFormatException e) {
1085       return parseException("Couldn't parse integer: " + e.getMessage());
1086     }
1087 
1088     /**
1089      * Constructs an appropriate {@link ParseException} for the given
1090      * {@code NumberFormatException} when trying to parse a float or double.
1091      */
floatParseException(final NumberFormatException e)1092     private ParseException floatParseException(final NumberFormatException e) {
1093       return parseException("Couldn't parse number: " + e.getMessage());
1094     }
1095 
1096     /**
1097      * Returns a {@link UnknownFieldParseException} with the line and column
1098      * numbers of the previous token in the description, and the unknown field
1099      * name, suitable for throwing.
1100      */
unknownFieldParseExceptionPreviousToken( final String unknownField, final String description)1101     public UnknownFieldParseException unknownFieldParseExceptionPreviousToken(
1102         final String unknownField, final String description) {
1103       // Note:  People generally prefer one-based line and column numbers.
1104       return new UnknownFieldParseException(
1105         previousLine + 1, previousColumn + 1, unknownField, description);
1106     }
1107   }
1108 
1109   /** Thrown when parsing an invalid text format message. */
1110   public static class ParseException extends IOException {
1111     private static final long serialVersionUID = 3196188060225107702L;
1112 
1113     private final int line;
1114     private final int column;
1115 
1116     /** Create a new instance, with -1 as the line and column numbers. */
ParseException(final String message)1117     public ParseException(final String message) {
1118       this(-1, -1, message);
1119     }
1120 
1121     /**
1122      * Create a new instance
1123      *
1124      * @param line the line number where the parse error occurred,
1125      * using 1-offset.
1126      * @param column the column number where the parser error occurred,
1127      * using 1-offset.
1128      */
ParseException(final int line, final int column, final String message)1129     public ParseException(final int line, final int column,
1130         final String message) {
1131       super(Integer.toString(line) + ":" + column + ": " + message);
1132       this.line = line;
1133       this.column = column;
1134     }
1135 
1136     /**
1137      * Return the line where the parse exception occurred, or -1 when
1138      * none is provided. The value is specified as 1-offset, so the first
1139      * line is line 1.
1140      */
getLine()1141     public int getLine() {
1142       return line;
1143     }
1144 
1145     /**
1146      * Return the column where the parse exception occurred, or -1 when
1147      * none is provided. The value is specified as 1-offset, so the first
1148      * line is line 1.
1149      */
getColumn()1150     public int getColumn() {
1151       return column;
1152     }
1153   }
1154 
1155   /**
1156    * Thrown when encountering an unknown field while parsing
1157    * a text format message.
1158    */
1159   public static class UnknownFieldParseException extends ParseException {
1160     private final String unknownField;
1161 
1162     /**
1163      * Create a new instance, with -1 as the line and column numbers, and an
1164      * empty unknown field name.
1165      */
UnknownFieldParseException(final String message)1166     public UnknownFieldParseException(final String message) {
1167       this(-1, -1, "", message);
1168     }
1169 
1170     /**
1171      * Create a new instance
1172      *
1173      * @param line the line number where the parse error occurred,
1174      * using 1-offset.
1175      * @param column the column number where the parser error occurred,
1176      * using 1-offset.
1177      * @param unknownField the name of the unknown field found while parsing.
1178      */
UnknownFieldParseException(final int line, final int column, final String unknownField, final String message)1179     public UnknownFieldParseException(final int line, final int column,
1180         final String unknownField, final String message) {
1181       super(line, column, message);
1182       this.unknownField = unknownField;
1183     }
1184 
1185     /**
1186      * Return the name of the unknown field encountered while parsing the
1187      * protocol buffer string.
1188      */
getUnknownField()1189     public String getUnknownField() {
1190       return unknownField;
1191     }
1192   }
1193 
1194   private static final Parser PARSER = Parser.newBuilder().build();
1195 
1196   /**
1197    * Return a {@link Parser} instance which can parse text-format
1198    * messages. The returned instance is thread-safe.
1199    */
getParser()1200   public static Parser getParser() {
1201     return PARSER;
1202   }
1203 
1204   /**
1205    * Parse a text-format message from {@code input} and merge the contents
1206    * into {@code builder}.
1207    */
merge(final Readable input, final Message.Builder builder)1208   public static void merge(final Readable input,
1209                            final Message.Builder builder)
1210                            throws IOException {
1211     PARSER.merge(input, builder);
1212   }
1213 
1214   /**
1215    * Parse a text-format message from {@code input} and merge the contents
1216    * into {@code builder}.
1217    */
merge(final CharSequence input, final Message.Builder builder)1218   public static void merge(final CharSequence input,
1219                            final Message.Builder builder)
1220                            throws ParseException {
1221     PARSER.merge(input, builder);
1222   }
1223 
1224   /**
1225    * Parse a text-format message from {@code input} and merge the contents
1226    * into {@code builder}.  Extensions will be recognized if they are
1227    * registered in {@code extensionRegistry}.
1228    */
merge(final Readable input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1229   public static void merge(final Readable input,
1230                            final ExtensionRegistry extensionRegistry,
1231                            final Message.Builder builder)
1232                            throws IOException {
1233     PARSER.merge(input, extensionRegistry, builder);
1234   }
1235 
1236 
1237   /**
1238    * Parse a text-format message from {@code input} and merge the contents
1239    * into {@code builder}.  Extensions will be recognized if they are
1240    * registered in {@code extensionRegistry}.
1241    */
merge(final CharSequence input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1242   public static void merge(final CharSequence input,
1243                            final ExtensionRegistry extensionRegistry,
1244                            final Message.Builder builder)
1245                            throws ParseException {
1246     PARSER.merge(input, extensionRegistry, builder);
1247   }
1248 
1249 
1250   /**
1251    * Parser for text-format proto2 instances. This class is thread-safe.
1252    * The implementation largely follows google/protobuf/text_format.cc.
1253    *
1254    * <p>Use {@link TextFormat#getParser()} to obtain the default parser, or
1255    * {@link Builder} to control the parser behavior.
1256    */
1257   public static class Parser {
1258     /**
1259      * Determines if repeated values for non-repeated fields and
1260      * oneofs are permitted. For example, given required/optional field "foo"
1261      * and a oneof containing "baz" and "qux":
1262      * <ul>
1263      * <li>"foo: 1 foo: 2"
1264      * <li>"baz: 1 qux: 2"
1265      * <li>merging "foo: 2" into a proto in which foo is already set, or
1266      * <li>merging "qux: 2" into a proto in which baz is already set.
1267      * </ul>
1268      */
1269     public enum SingularOverwritePolicy {
1270       /** The last value is retained. */
1271       ALLOW_SINGULAR_OVERWRITES,
1272       /** An error is issued. */
1273       FORBID_SINGULAR_OVERWRITES
1274     }
1275 
1276     private final boolean allowUnknownFields;
1277     private final SingularOverwritePolicy singularOverwritePolicy;
1278     private TextFormatParseInfoTree.Builder parseInfoTreeBuilder;
1279 
Parser( boolean allowUnknownFields, SingularOverwritePolicy singularOverwritePolicy, TextFormatParseInfoTree.Builder parseInfoTreeBuilder)1280     private Parser(
1281         boolean allowUnknownFields, SingularOverwritePolicy singularOverwritePolicy,
1282         TextFormatParseInfoTree.Builder parseInfoTreeBuilder) {
1283       this.allowUnknownFields = allowUnknownFields;
1284       this.singularOverwritePolicy = singularOverwritePolicy;
1285       this.parseInfoTreeBuilder = parseInfoTreeBuilder;
1286     }
1287 
1288     /**
1289      * Returns a new instance of {@link Builder}.
1290      */
newBuilder()1291     public static Builder newBuilder() {
1292       return new Builder();
1293     }
1294 
1295     /**
1296      * Builder that can be used to obtain new instances of {@link Parser}.
1297      */
1298     public static class Builder {
1299       private boolean allowUnknownFields = false;
1300       private SingularOverwritePolicy singularOverwritePolicy =
1301           SingularOverwritePolicy.ALLOW_SINGULAR_OVERWRITES;
1302       private TextFormatParseInfoTree.Builder parseInfoTreeBuilder = null;
1303 
1304 
1305       /**
1306        * Sets parser behavior when a non-repeated field appears more than once.
1307        */
setSingularOverwritePolicy(SingularOverwritePolicy p)1308       public Builder setSingularOverwritePolicy(SingularOverwritePolicy p) {
1309         this.singularOverwritePolicy = p;
1310         return this;
1311       }
1312 
setParseInfoTreeBuilder( TextFormatParseInfoTree.Builder parseInfoTreeBuilder)1313       public Builder setParseInfoTreeBuilder(
1314           TextFormatParseInfoTree.Builder parseInfoTreeBuilder) {
1315         this.parseInfoTreeBuilder = parseInfoTreeBuilder;
1316         return this;
1317       }
1318 
build()1319       public Parser build() {
1320         return new Parser(
1321             allowUnknownFields, singularOverwritePolicy, parseInfoTreeBuilder);
1322       }
1323     }
1324 
1325     /**
1326      * Parse a text-format message from {@code input} and merge the contents
1327      * into {@code builder}.
1328      */
merge(final Readable input, final Message.Builder builder)1329     public void merge(final Readable input,
1330                       final Message.Builder builder)
1331                       throws IOException {
1332       merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
1333     }
1334 
1335     /**
1336      * Parse a text-format message from {@code input} and merge the contents
1337      * into {@code builder}.
1338      */
merge(final CharSequence input, final Message.Builder builder)1339     public void merge(final CharSequence input,
1340                       final Message.Builder builder)
1341                       throws ParseException {
1342       merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
1343     }
1344 
1345     /**
1346      * Parse a text-format message from {@code input} and merge the contents
1347      * into {@code builder}.  Extensions will be recognized if they are
1348      * registered in {@code extensionRegistry}.
1349      */
merge(final Readable input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1350     public void merge(final Readable input,
1351                       final ExtensionRegistry extensionRegistry,
1352                       final Message.Builder builder)
1353                       throws IOException {
1354       // Read the entire input to a String then parse that.
1355 
1356       // If StreamTokenizer were not quite so crippled, or if there were a kind
1357       // of Reader that could read in chunks that match some particular regex,
1358       // or if we wanted to write a custom Reader to tokenize our stream, then
1359       // we would not have to read to one big String.  Alas, none of these is
1360       // the case.  Oh well.
1361 
1362       merge(toStringBuilder(input), extensionRegistry, builder);
1363     }
1364 
1365 
1366     private static final int BUFFER_SIZE = 4096;
1367 
1368     // TODO(chrisn): See if working around java.io.Reader#read(CharBuffer)
1369     // overhead is worthwhile
toStringBuilder(final Readable input)1370     private static StringBuilder toStringBuilder(final Readable input)
1371         throws IOException {
1372       final StringBuilder text = new StringBuilder();
1373       final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE);
1374       while (true) {
1375         final int n = input.read(buffer);
1376         if (n == -1) {
1377           break;
1378         }
1379         buffer.flip();
1380         text.append(buffer, 0, n);
1381       }
1382       return text;
1383     }
1384 
1385     // Check both unknown fields and unknown extensions and log warming messages
1386     // or throw exceptions according to the flag.
checkUnknownFields(final List<String> unknownFields)1387     private void checkUnknownFields(final List<String> unknownFields)
1388         throws ParseException {
1389       if (unknownFields.isEmpty()) {
1390         return;
1391       }
1392 
1393       StringBuilder msg = new StringBuilder("Input contains unknown fields and/or extensions:");
1394       for (String field : unknownFields) {
1395         msg.append('\n').append(field);
1396       }
1397 
1398       if (allowUnknownFields) {
1399           logger.warning(msg.toString());
1400       } else {
1401         String[] lineColumn = unknownFields.get(0).split(":");
1402         throw new ParseException(Integer.valueOf(lineColumn[0]),
1403             Integer.valueOf(lineColumn[1]), msg.toString());
1404       }
1405     }
1406 
1407     /**
1408      * Parse a text-format message from {@code input} and merge the contents
1409      * into {@code builder}.  Extensions will be recognized if they are
1410      * registered in {@code extensionRegistry}.
1411      */
merge(final CharSequence input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1412     public void merge(final CharSequence input,
1413                       final ExtensionRegistry extensionRegistry,
1414                       final Message.Builder builder)
1415                       throws ParseException {
1416       final Tokenizer tokenizer = new Tokenizer(input);
1417       MessageReflection.BuilderAdapter target =
1418           new MessageReflection.BuilderAdapter(builder);
1419 
1420       List<String> unknownFields = new ArrayList<String>();
1421 
1422       while (!tokenizer.atEnd()) {
1423         mergeField(tokenizer, extensionRegistry, target, unknownFields);
1424       }
1425 
1426       checkUnknownFields(unknownFields);
1427     }
1428 
1429 
1430     /**
1431      * Parse a single field from {@code tokenizer} and merge it into
1432      * {@code builder}.
1433      */
mergeField(final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, List<String> unknownFields)1434     private void mergeField(final Tokenizer tokenizer,
1435                             final ExtensionRegistry extensionRegistry,
1436                             final MessageReflection.MergeTarget target,
1437                             List<String> unknownFields)
1438                             throws ParseException {
1439       mergeField(tokenizer, extensionRegistry, target, parseInfoTreeBuilder,
1440                  unknownFields);
1441     }
1442 
1443     /**
1444      * Parse a single field from {@code tokenizer} and merge it into
1445      * {@code builder}.
1446      */
mergeField(final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, TextFormatParseInfoTree.Builder parseTreeBuilder, List<String> unknownFields)1447     private void mergeField(final Tokenizer tokenizer,
1448                             final ExtensionRegistry extensionRegistry,
1449                             final MessageReflection.MergeTarget target,
1450                             TextFormatParseInfoTree.Builder parseTreeBuilder,
1451                             List<String> unknownFields)
1452                             throws ParseException {
1453       FieldDescriptor field = null;
1454       int startLine = tokenizer.getLine();
1455       int startColumn = tokenizer.getColumn();
1456       final Descriptor type = target.getDescriptorForType();
1457       ExtensionRegistry.ExtensionInfo extension = null;
1458 
1459       if (tokenizer.tryConsume("[")) {
1460         // An extension.
1461         final StringBuilder name =
1462             new StringBuilder(tokenizer.consumeIdentifier());
1463         while (tokenizer.tryConsume(".")) {
1464           name.append('.');
1465           name.append(tokenizer.consumeIdentifier());
1466         }
1467 
1468         extension = target.findExtensionByName(
1469             extensionRegistry, name.toString());
1470 
1471         if (extension == null) {
1472           unknownFields.add((tokenizer.getPreviousLine() + 1) + ":" +
1473               (tokenizer.getPreviousColumn() + 1) + ":\t" +
1474               type.getFullName() + ".[" + name + "]");
1475         } else {
1476           if (extension.descriptor.getContainingType() != type) {
1477             throw tokenizer.parseExceptionPreviousToken(
1478               "Extension \"" + name + "\" does not extend message type \""
1479               + type.getFullName() + "\".");
1480           }
1481           field = extension.descriptor;
1482         }
1483 
1484         tokenizer.consume("]");
1485       } else {
1486         final String name = tokenizer.consumeIdentifier();
1487         field = type.findFieldByName(name);
1488 
1489         // Group names are expected to be capitalized as they appear in the
1490         // .proto file, which actually matches their type names, not their field
1491         // names.
1492         if (field == null) {
1493           // Explicitly specify US locale so that this code does not break when
1494           // executing in Turkey.
1495           final String lowerName = name.toLowerCase(Locale.US);
1496           field = type.findFieldByName(lowerName);
1497           // If the case-insensitive match worked but the field is NOT a group,
1498           if (field != null && field.getType() != FieldDescriptor.Type.GROUP) {
1499             field = null;
1500           }
1501         }
1502         // Again, special-case group names as described above.
1503         if (field != null && field.getType() == FieldDescriptor.Type.GROUP
1504             && !field.getMessageType().getName().equals(name)) {
1505           field = null;
1506         }
1507 
1508         if (field == null) {
1509           unknownFields.add((tokenizer.getPreviousLine() + 1) + ":" +
1510               (tokenizer.getPreviousColumn() + 1) + ":\t" +
1511               type.getFullName() + "." + name);
1512         }
1513       }
1514 
1515       // Skips unknown fields.
1516       if (field == null) {
1517         // Try to guess the type of this field.
1518         // If this field is not a message, there should be a ":" between the
1519         // field name and the field value and also the field value should not
1520         // start with "{" or "<" which indicates the beginning of a message body.
1521         // If there is no ":" or there is a "{" or "<" after ":", this field has
1522         // to be a message or the input is ill-formed.
1523         if (tokenizer.tryConsume(":")
1524             && !tokenizer.lookingAt("{")
1525             && !tokenizer.lookingAt("<")) {
1526           skipFieldValue(tokenizer);
1527         } else {
1528           skipFieldMessage(tokenizer);
1529         }
1530         return;
1531       }
1532 
1533       // Handle potential ':'.
1534       if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
1535         tokenizer.tryConsume(":");  // optional
1536         if (parseTreeBuilder != null) {
1537           TextFormatParseInfoTree.Builder childParseTreeBuilder =
1538               parseTreeBuilder.getBuilderForSubMessageField(field);
1539           consumeFieldValues(tokenizer, extensionRegistry, target, field, extension,
1540               childParseTreeBuilder, unknownFields);
1541         } else {
1542           consumeFieldValues(tokenizer, extensionRegistry, target, field, extension,
1543               parseTreeBuilder, unknownFields);
1544         }
1545       } else {
1546         tokenizer.consume(":");  // required
1547         consumeFieldValues(tokenizer, extensionRegistry, target, field,
1548             extension, parseTreeBuilder, unknownFields);
1549       }
1550 
1551       if (parseTreeBuilder != null) {
1552         parseTreeBuilder.setLocation(
1553             field, TextFormatParseLocation.create(startLine, startColumn));
1554       }
1555 
1556       // For historical reasons, fields may optionally be separated by commas or
1557       // semicolons.
1558       if (!tokenizer.tryConsume(";")) {
1559         tokenizer.tryConsume(",");
1560       }
1561     }
1562 
1563     /**
1564      * Parse a one or more field values from {@code tokenizer} and merge it into
1565      * {@code builder}.
1566      */
consumeFieldValues( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, final FieldDescriptor field, final ExtensionRegistry.ExtensionInfo extension, final TextFormatParseInfoTree.Builder parseTreeBuilder, List<String> unknownFields)1567     private void consumeFieldValues(
1568         final Tokenizer tokenizer,
1569         final ExtensionRegistry extensionRegistry,
1570         final MessageReflection.MergeTarget target,
1571         final FieldDescriptor field,
1572         final ExtensionRegistry.ExtensionInfo extension,
1573         final TextFormatParseInfoTree.Builder parseTreeBuilder,
1574         List<String> unknownFields)
1575         throws ParseException {
1576       // Support specifying repeated field values as a comma-separated list.
1577       // Ex."foo: [1, 2, 3]"
1578       if (field.isRepeated() && tokenizer.tryConsume("[")) {
1579         while (true) {
1580           consumeFieldValue(tokenizer, extensionRegistry, target, field, extension,
1581               parseTreeBuilder, unknownFields);
1582           if (tokenizer.tryConsume("]")) {
1583             // End of list.
1584             break;
1585           }
1586           tokenizer.consume(",");
1587         }
1588       } else {
1589         consumeFieldValue(tokenizer, extensionRegistry, target, field,
1590             extension, parseTreeBuilder, unknownFields);
1591       }
1592     }
1593 
1594     /**
1595      * Parse a single field value from {@code tokenizer} and merge it into
1596      * {@code builder}.
1597      */
consumeFieldValue( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, final FieldDescriptor field, final ExtensionRegistry.ExtensionInfo extension, final TextFormatParseInfoTree.Builder parseTreeBuilder, List<String> unknownFields)1598     private void consumeFieldValue(
1599         final Tokenizer tokenizer,
1600         final ExtensionRegistry extensionRegistry,
1601         final MessageReflection.MergeTarget target,
1602         final FieldDescriptor field,
1603         final ExtensionRegistry.ExtensionInfo extension,
1604         final TextFormatParseInfoTree.Builder parseTreeBuilder,
1605         List<String> unknownFields)
1606         throws ParseException {
1607       Object value = null;
1608 
1609       if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
1610         final String endToken;
1611         if (tokenizer.tryConsume("<")) {
1612           endToken = ">";
1613         } else {
1614           tokenizer.consume("{");
1615           endToken = "}";
1616         }
1617 
1618         final MessageReflection.MergeTarget subField;
1619         subField = target.newMergeTargetForField(field,
1620             (extension == null) ? null : extension.defaultInstance);
1621 
1622         while (!tokenizer.tryConsume(endToken)) {
1623           if (tokenizer.atEnd()) {
1624             throw tokenizer.parseException(
1625               "Expected \"" + endToken + "\".");
1626           }
1627           mergeField(tokenizer, extensionRegistry, subField, parseTreeBuilder,
1628               unknownFields);
1629         }
1630 
1631         value = subField.finish();
1632 
1633       } else {
1634         switch (field.getType()) {
1635           case INT32:
1636           case SINT32:
1637           case SFIXED32:
1638             value = tokenizer.consumeInt32();
1639             break;
1640 
1641           case INT64:
1642           case SINT64:
1643           case SFIXED64:
1644             value = tokenizer.consumeInt64();
1645             break;
1646 
1647           case UINT32:
1648           case FIXED32:
1649             value = tokenizer.consumeUInt32();
1650             break;
1651 
1652           case UINT64:
1653           case FIXED64:
1654             value = tokenizer.consumeUInt64();
1655             break;
1656 
1657           case FLOAT:
1658             value = tokenizer.consumeFloat();
1659             break;
1660 
1661           case DOUBLE:
1662             value = tokenizer.consumeDouble();
1663             break;
1664 
1665           case BOOL:
1666             value = tokenizer.consumeBoolean();
1667             break;
1668 
1669           case STRING:
1670             value = tokenizer.consumeString();
1671             break;
1672 
1673           case BYTES:
1674             value = tokenizer.consumeByteString();
1675             break;
1676 
1677           case ENUM:
1678             final EnumDescriptor enumType = field.getEnumType();
1679 
1680             if (tokenizer.lookingAtInteger()) {
1681               final int number = tokenizer.consumeInt32();
1682               value = enumType.findValueByNumber(number);
1683               if (value == null) {
1684                 throw tokenizer.parseExceptionPreviousToken(
1685                   "Enum type \"" + enumType.getFullName()
1686                   + "\" has no value with number " + number + '.');
1687               }
1688             } else {
1689               final String id = tokenizer.consumeIdentifier();
1690               value = enumType.findValueByName(id);
1691               if (value == null) {
1692                 throw tokenizer.parseExceptionPreviousToken(
1693                   "Enum type \"" + enumType.getFullName()
1694                   + "\" has no value named \"" + id + "\".");
1695               }
1696             }
1697 
1698             break;
1699 
1700           case MESSAGE:
1701           case GROUP:
1702             throw new RuntimeException("Can't get here.");
1703         }
1704       }
1705 
1706       if (field.isRepeated()) {
1707         target.addRepeatedField(field, value);
1708       } else if ((singularOverwritePolicy
1709               == SingularOverwritePolicy.FORBID_SINGULAR_OVERWRITES)
1710           && target.hasField(field)) {
1711         throw tokenizer.parseExceptionPreviousToken("Non-repeated field \""
1712             + field.getFullName() + "\" cannot be overwritten.");
1713       } else if ((singularOverwritePolicy
1714               == SingularOverwritePolicy.FORBID_SINGULAR_OVERWRITES)
1715           && field.getContainingOneof() != null
1716           && target.hasOneof(field.getContainingOneof())) {
1717         Descriptors.OneofDescriptor oneof = field.getContainingOneof();
1718         throw tokenizer.parseExceptionPreviousToken("Field \""
1719             + field.getFullName() + "\" is specified along with field \""
1720             + target.getOneofFieldDescriptor(oneof).getFullName()
1721             + "\", another member of oneof \"" + oneof.getName() + "\".");
1722       } else {
1723         target.setField(field, value);
1724       }
1725     }
1726 
1727     /**
1728      * Skips the next field including the field's name and value.
1729      */
skipField(Tokenizer tokenizer)1730     private void skipField(Tokenizer tokenizer) throws ParseException {
1731       if (tokenizer.tryConsume("[")) {
1732         // Extension name.
1733         do {
1734           tokenizer.consumeIdentifier();
1735         } while (tokenizer.tryConsume("."));
1736         tokenizer.consume("]");
1737       } else {
1738         tokenizer.consumeIdentifier();
1739       }
1740 
1741       // Try to guess the type of this field.
1742       // If this field is not a message, there should be a ":" between the
1743       // field name and the field value and also the field value should not
1744       // start with "{" or "<" which indicates the beginning of a message body.
1745       // If there is no ":" or there is a "{" or "<" after ":", this field has
1746       // to be a message or the input is ill-formed.
1747       if (tokenizer.tryConsume(":")
1748           && !tokenizer.lookingAt("<")
1749           && !tokenizer.lookingAt("{")) {
1750         skipFieldValue(tokenizer);
1751       } else {
1752         skipFieldMessage(tokenizer);
1753       }
1754       // For historical reasons, fields may optionally be separated by commas or
1755       // semicolons.
1756       if (!tokenizer.tryConsume(";")) {
1757         tokenizer.tryConsume(",");
1758       }
1759     }
1760 
1761     /**
1762      * Skips the whole body of a message including the beginning delimiter and
1763      * the ending delimiter.
1764      */
skipFieldMessage(Tokenizer tokenizer)1765     private void skipFieldMessage(Tokenizer tokenizer) throws ParseException {
1766       final String delimiter;
1767       if (tokenizer.tryConsume("<")) {
1768         delimiter = ">";
1769       } else {
1770         tokenizer.consume("{");
1771         delimiter = "}";
1772       }
1773       while (!tokenizer.lookingAt(">") && !tokenizer.lookingAt("}")) {
1774         skipField(tokenizer);
1775       }
1776       tokenizer.consume(delimiter);
1777     }
1778 
1779     /**
1780      * Skips a field value.
1781      */
skipFieldValue(Tokenizer tokenizer)1782     private void skipFieldValue(Tokenizer tokenizer) throws ParseException {
1783       if (tokenizer.tryConsumeString()) {
1784         while (tokenizer.tryConsumeString()) {}
1785         return;
1786       }
1787       if (!tokenizer.tryConsumeIdentifier()   // includes enum & boolean
1788           && !tokenizer.tryConsumeInt64()     // includes int32
1789           && !tokenizer.tryConsumeUInt64()    // includes uint32
1790           && !tokenizer.tryConsumeDouble()
1791           && !tokenizer.tryConsumeFloat()) {
1792         throw tokenizer.parseException(
1793             "Invalid field value: " + tokenizer.currentToken);
1794       }
1795     }
1796   }
1797 
1798   // =================================================================
1799   // Utility functions
1800   //
1801   // Some of these methods are package-private because Descriptors.java uses
1802   // them.
1803 
1804   /**
1805    * Escapes bytes in the format used in protocol buffer text format, which
1806    * is the same as the format used for C string literals.  All bytes
1807    * that are not printable 7-bit ASCII characters are escaped, as well as
1808    * backslash, single-quote, and double-quote characters.  Characters for
1809    * which no defined short-hand escape sequence is defined will be escaped
1810    * using 3-digit octal sequences.
1811    */
escapeBytes(ByteString input)1812   public static String escapeBytes(ByteString input) {
1813     return TextFormatEscaper.escapeBytes(input);
1814   }
1815 
1816   /**
1817    * Like {@link #escapeBytes(ByteString)}, but used for byte array.
1818    */
escapeBytes(byte[] input)1819   public static String escapeBytes(byte[] input) {
1820     return TextFormatEscaper.escapeBytes(input);
1821   }
1822 
1823   /**
1824    * Un-escape a byte sequence as escaped using
1825    * {@link #escapeBytes(ByteString)}.  Two-digit hex escapes (starting with
1826    * "\x") are also recognized.
1827    */
unescapeBytes(final CharSequence charString)1828   public static ByteString unescapeBytes(final CharSequence charString)
1829       throws InvalidEscapeSequenceException {
1830     // First convert the Java character sequence to UTF-8 bytes.
1831     ByteString input = ByteString.copyFromUtf8(charString.toString());
1832     // Then unescape certain byte sequences introduced by ASCII '\\'.  The valid
1833     // escapes can all be expressed with ASCII characters, so it is safe to
1834     // operate on bytes here.
1835     //
1836     // Unescaping the input byte array will result in a byte sequence that's no
1837     // longer than the input.  That's because each escape sequence is between
1838     // two and four bytes long and stands for a single byte.
1839     final byte[] result = new byte[input.size()];
1840     int pos = 0;
1841     for (int i = 0; i < input.size(); i++) {
1842       byte c = input.byteAt(i);
1843       if (c == '\\') {
1844         if (i + 1 < input.size()) {
1845           ++i;
1846           c = input.byteAt(i);
1847           if (isOctal(c)) {
1848             // Octal escape.
1849             int code = digitValue(c);
1850             if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) {
1851               ++i;
1852               code = code * 8 + digitValue(input.byteAt(i));
1853             }
1854             if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) {
1855               ++i;
1856               code = code * 8 + digitValue(input.byteAt(i));
1857             }
1858             // TODO: Check that 0 <= code && code <= 0xFF.
1859             result[pos++] = (byte) code;
1860           } else {
1861             switch (c) {
1862               case 'a' : result[pos++] = 0x07; break;
1863               case 'b' : result[pos++] = '\b'; break;
1864               case 'f' : result[pos++] = '\f'; break;
1865               case 'n' : result[pos++] = '\n'; break;
1866               case 'r' : result[pos++] = '\r'; break;
1867               case 't' : result[pos++] = '\t'; break;
1868               case 'v' : result[pos++] = 0x0b; break;
1869               case '\\': result[pos++] = '\\'; break;
1870               case '\'': result[pos++] = '\''; break;
1871               case '"' : result[pos++] = '\"'; break;
1872 
1873               case 'x':
1874                 // hex escape
1875                 int code = 0;
1876                 if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) {
1877                   ++i;
1878                   code = digitValue(input.byteAt(i));
1879                 } else {
1880                   throw new InvalidEscapeSequenceException(
1881                       "Invalid escape sequence: '\\x' with no digits");
1882                 }
1883                 if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) {
1884                   ++i;
1885                   code = code * 16 + digitValue(input.byteAt(i));
1886                 }
1887                 result[pos++] = (byte) code;
1888                 break;
1889 
1890               default:
1891                 throw new InvalidEscapeSequenceException(
1892                     "Invalid escape sequence: '\\" + (char) c + '\'');
1893             }
1894           }
1895         } else {
1896           throw new InvalidEscapeSequenceException(
1897               "Invalid escape sequence: '\\' at end of string.");
1898         }
1899       } else {
1900         result[pos++] = c;
1901       }
1902     }
1903 
1904     return result.length == pos
1905         ? ByteString.wrap(result)  // This reference has not been out of our control.
1906         : ByteString.copyFrom(result, 0, pos);
1907   }
1908 
1909   /**
1910    * Thrown by {@link TextFormat#unescapeBytes} and
1911    * {@link TextFormat#unescapeText} when an invalid escape sequence is seen.
1912    */
1913   public static class InvalidEscapeSequenceException extends IOException {
1914     private static final long serialVersionUID = -8164033650142593304L;
1915 
InvalidEscapeSequenceException(final String description)1916     InvalidEscapeSequenceException(final String description) {
1917       super(description);
1918     }
1919   }
1920 
1921   /**
1922    * Like {@link #escapeBytes(ByteString)}, but escapes a text string.
1923    * Non-ASCII characters are first encoded as UTF-8, then each byte is escaped
1924    * individually as a 3-digit octal escape.  Yes, it's weird.
1925    */
escapeText(final String input)1926   static String escapeText(final String input) {
1927     return escapeBytes(ByteString.copyFromUtf8(input));
1928   }
1929 
1930   /**
1931    * Escape double quotes and backslashes in a String for unicode output of a message.
1932    */
escapeDoubleQuotesAndBackslashes(final String input)1933   public static String escapeDoubleQuotesAndBackslashes(final String input) {
1934     return TextFormatEscaper.escapeDoubleQuotesAndBackslashes(input);
1935   }
1936 
1937   /**
1938    * Un-escape a text string as escaped using {@link #escapeText(String)}.
1939    * Two-digit hex escapes (starting with "\x") are also recognized.
1940    */
unescapeText(final String input)1941   static String unescapeText(final String input)
1942                              throws InvalidEscapeSequenceException {
1943     return unescapeBytes(input).toStringUtf8();
1944   }
1945 
1946   /** Is this an octal digit? */
isOctal(final byte c)1947   private static boolean isOctal(final byte c) {
1948     return '0' <= c && c <= '7';
1949   }
1950 
1951   /** Is this a hex digit? */
isHex(final byte c)1952   private static boolean isHex(final byte c) {
1953     return ('0' <= c && c <= '9')
1954         || ('a' <= c && c <= 'f')
1955         || ('A' <= c && c <= 'F');
1956   }
1957 
1958   /**
1959    * Interpret a character as a digit (in any base up to 36) and return the
1960    * numeric value.  This is like {@code Character.digit()} but we don't accept
1961    * non-ASCII digits.
1962    */
digitValue(final byte c)1963   private static int digitValue(final byte c) {
1964     if ('0' <= c && c <= '9') {
1965       return c - '0';
1966     } else if ('a' <= c && c <= 'z') {
1967       return c - 'a' + 10;
1968     } else {
1969       return c - 'A' + 10;
1970     }
1971   }
1972 
1973   /**
1974    * Parse a 32-bit signed integer from the text.  Unlike the Java standard
1975    * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
1976    * and "0" to signify hexadecimal and octal numbers, respectively.
1977    */
parseInt32(final String text)1978   static int parseInt32(final String text) throws NumberFormatException {
1979     return (int) parseInteger(text, true, false);
1980   }
1981 
1982   /**
1983    * Parse a 32-bit unsigned integer from the text.  Unlike the Java standard
1984    * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
1985    * and "0" to signify hexadecimal and octal numbers, respectively.  The
1986    * result is coerced to a (signed) {@code int} when returned since Java has
1987    * no unsigned integer type.
1988    */
parseUInt32(final String text)1989   static int parseUInt32(final String text) throws NumberFormatException {
1990     return (int) parseInteger(text, false, false);
1991   }
1992 
1993   /**
1994    * Parse a 64-bit signed integer from the text.  Unlike the Java standard
1995    * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
1996    * and "0" to signify hexadecimal and octal numbers, respectively.
1997    */
parseInt64(final String text)1998   static long parseInt64(final String text) throws NumberFormatException {
1999     return parseInteger(text, true, true);
2000   }
2001 
2002   /**
2003    * Parse a 64-bit unsigned integer from the text.  Unlike the Java standard
2004    * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
2005    * and "0" to signify hexadecimal and octal numbers, respectively.  The
2006    * result is coerced to a (signed) {@code long} when returned since Java has
2007    * no unsigned long type.
2008    */
parseUInt64(final String text)2009   static long parseUInt64(final String text) throws NumberFormatException {
2010     return parseInteger(text, false, true);
2011   }
2012 
parseInteger(final String text, final boolean isSigned, final boolean isLong)2013   private static long parseInteger(final String text,
2014                                    final boolean isSigned,
2015                                    final boolean isLong)
2016                                    throws NumberFormatException {
2017     int pos = 0;
2018 
2019     boolean negative = false;
2020     if (text.startsWith("-", pos)) {
2021       if (!isSigned) {
2022         throw new NumberFormatException("Number must be positive: " + text);
2023       }
2024       ++pos;
2025       negative = true;
2026     }
2027 
2028     int radix = 10;
2029     if (text.startsWith("0x", pos)) {
2030       pos += 2;
2031       radix = 16;
2032     } else if (text.startsWith("0", pos)) {
2033       radix = 8;
2034     }
2035 
2036     final String numberText = text.substring(pos);
2037 
2038     long result = 0;
2039     if (numberText.length() < 16) {
2040       // Can safely assume no overflow.
2041       result = Long.parseLong(numberText, radix);
2042       if (negative) {
2043         result = -result;
2044       }
2045 
2046       // Check bounds.
2047       // No need to check for 64-bit numbers since they'd have to be 16 chars
2048       // or longer to overflow.
2049       if (!isLong) {
2050         if (isSigned) {
2051           if (result > Integer.MAX_VALUE || result < Integer.MIN_VALUE) {
2052             throw new NumberFormatException(
2053               "Number out of range for 32-bit signed integer: " + text);
2054           }
2055         } else {
2056           if (result >= (1L << 32) || result < 0) {
2057             throw new NumberFormatException(
2058               "Number out of range for 32-bit unsigned integer: " + text);
2059           }
2060         }
2061       }
2062     } else {
2063       BigInteger bigValue = new BigInteger(numberText, radix);
2064       if (negative) {
2065         bigValue = bigValue.negate();
2066       }
2067 
2068       // Check bounds.
2069       if (!isLong) {
2070         if (isSigned) {
2071           if (bigValue.bitLength() > 31) {
2072             throw new NumberFormatException(
2073               "Number out of range for 32-bit signed integer: " + text);
2074           }
2075         } else {
2076           if (bigValue.bitLength() > 32) {
2077             throw new NumberFormatException(
2078               "Number out of range for 32-bit unsigned integer: " + text);
2079           }
2080         }
2081       } else {
2082         if (isSigned) {
2083           if (bigValue.bitLength() > 63) {
2084             throw new NumberFormatException(
2085               "Number out of range for 64-bit signed integer: " + text);
2086           }
2087         } else {
2088           if (bigValue.bitLength() > 64) {
2089             throw new NumberFormatException(
2090               "Number out of range for 64-bit unsigned integer: " + text);
2091           }
2092         }
2093       }
2094 
2095       result = bigValue.longValue();
2096     }
2097 
2098     return result;
2099   }
2100 }
2101