1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 package com.google.protobuf;
32 
33 import com.google.protobuf.Descriptors.Descriptor;
34 import com.google.protobuf.Descriptors.EnumDescriptor;
35 import com.google.protobuf.Descriptors.EnumValueDescriptor;
36 import com.google.protobuf.Descriptors.FieldDescriptor;
37 
38 import java.io.IOException;
39 import java.math.BigInteger;
40 import java.nio.CharBuffer;
41 import java.util.ArrayList;
42 import java.util.List;
43 import java.util.Locale;
44 import java.util.Map;
45 import java.util.logging.Logger;
46 import java.util.regex.Matcher;
47 import java.util.regex.Pattern;
48 
49 /**
50  * Provide text parsing and formatting support for proto2 instances.
51  * The implementation largely follows google/protobuf/text_format.cc.
52  *
53  * @author wenboz@google.com Wenbo Zhu
54  * @author kenton@google.com Kenton Varda
55  */
56 public final class TextFormat {
TextFormat()57   private TextFormat() {}
58 
59   private static final Logger logger =
60       Logger.getLogger(TextFormat.class.getName());
61 
62   private static final Printer DEFAULT_PRINTER = new Printer();
63   private static final Printer SINGLE_LINE_PRINTER =
64       (new Printer()).setSingleLineMode(true);
65   private static final Printer UNICODE_PRINTER =
66       (new Printer()).setEscapeNonAscii(false);
67 
68   /**
69    * Outputs a textual representation of the Protocol Message supplied into
70    * the parameter output. (This representation is the new version of the
71    * classic "ProtocolPrinter" output from the original Protocol Buffer system)
72    */
print( final MessageOrBuilder message, final Appendable output)73   public static void print(
74       final MessageOrBuilder message, final Appendable output)
75       throws IOException {
76     DEFAULT_PRINTER.print(message, new TextGenerator(output));
77   }
78 
79   /** Outputs a textual representation of {@code fields} to {@code output}. */
print(final UnknownFieldSet fields, final Appendable output)80   public static void print(final UnknownFieldSet fields,
81                            final Appendable output)
82                            throws IOException {
83     DEFAULT_PRINTER.printUnknownFields(fields, new TextGenerator(output));
84   }
85 
86   /**
87    * Same as {@code print()}, except that non-ASCII characters are not
88    * escaped.
89    */
printUnicode( final MessageOrBuilder message, final Appendable output)90   public static void printUnicode(
91       final MessageOrBuilder message, final Appendable output)
92       throws IOException {
93     UNICODE_PRINTER.print(message, new TextGenerator(output));
94   }
95 
96   /**
97    * Same as {@code print()}, except that non-ASCII characters are not
98    * escaped.
99    */
printUnicode(final UnknownFieldSet fields, final Appendable output)100   public static void printUnicode(final UnknownFieldSet fields,
101                                   final Appendable output)
102                                   throws IOException {
103     UNICODE_PRINTER.printUnknownFields(fields, new TextGenerator(output));
104   }
105 
106   /**
107    * Generates a human readable form of this message, useful for debugging and
108    * other purposes, with no newline characters.
109    */
shortDebugString(final MessageOrBuilder message)110   public static String shortDebugString(final MessageOrBuilder message) {
111     try {
112       final StringBuilder sb = new StringBuilder();
113       SINGLE_LINE_PRINTER.print(message, new TextGenerator(sb));
114       // Single line mode currently might have an extra space at the end.
115       return sb.toString().trim();
116     } catch (IOException e) {
117       throw new IllegalStateException(e);
118     }
119   }
120 
121   /**
122    * Generates a human readable form of the field, useful for debugging
123    * and other purposes, with no newline characters.
124    */
shortDebugString(final FieldDescriptor field, final Object value)125   public static String shortDebugString(final FieldDescriptor field,
126                                         final Object value) {
127     try {
128       final StringBuilder sb = new StringBuilder();
129       SINGLE_LINE_PRINTER.printField(field, value, new TextGenerator(sb));
130       return sb.toString().trim();
131     } catch (IOException e) {
132         throw new IllegalStateException(e);
133     }
134   }
135 
136   /**
137    * Generates a human readable form of the unknown fields, useful for debugging
138    * and other purposes, with no newline characters.
139    */
shortDebugString(final UnknownFieldSet fields)140   public static String shortDebugString(final UnknownFieldSet fields) {
141     try {
142       final StringBuilder sb = new StringBuilder();
143       SINGLE_LINE_PRINTER.printUnknownFields(fields, new TextGenerator(sb));
144       // Single line mode currently might have an extra space at the end.
145       return sb.toString().trim();
146     } catch (IOException e) {
147       throw new IllegalStateException(e);
148     }
149   }
150 
151   /**
152    * Like {@code print()}, but writes directly to a {@code String} and
153    * returns it.
154    */
printToString(final MessageOrBuilder message)155   public static String printToString(final MessageOrBuilder message) {
156     try {
157       final StringBuilder text = new StringBuilder();
158       print(message, text);
159       return text.toString();
160     } catch (IOException e) {
161       throw new IllegalStateException(e);
162     }
163   }
164 
165   /**
166    * Like {@code print()}, but writes directly to a {@code String} and
167    * returns it.
168    */
printToString(final UnknownFieldSet fields)169   public static String printToString(final UnknownFieldSet fields) {
170     try {
171       final StringBuilder text = new StringBuilder();
172       print(fields, text);
173       return text.toString();
174     } catch (IOException e) {
175       throw new IllegalStateException(e);
176     }
177   }
178 
179   /**
180    * Same as {@code printToString()}, except that non-ASCII characters
181    * in string type fields are not escaped in backslash+octals.
182    */
printToUnicodeString(final MessageOrBuilder message)183   public static String printToUnicodeString(final MessageOrBuilder message) {
184     try {
185       final StringBuilder text = new StringBuilder();
186       UNICODE_PRINTER.print(message, new TextGenerator(text));
187       return text.toString();
188     } catch (IOException e) {
189       throw new IllegalStateException(e);
190     }
191   }
192 
193   /**
194    * Same as {@code printToString()}, except that non-ASCII characters
195    * in string type fields are not escaped in backslash+octals.
196    */
printToUnicodeString(final UnknownFieldSet fields)197   public static String printToUnicodeString(final UnknownFieldSet fields) {
198     try {
199       final StringBuilder text = new StringBuilder();
200       UNICODE_PRINTER.printUnknownFields(fields, new TextGenerator(text));
201       return text.toString();
202     } catch (IOException e) {
203       throw new IllegalStateException(e);
204     }
205   }
206 
printField(final FieldDescriptor field, final Object value, final Appendable output)207   public static void printField(final FieldDescriptor field,
208                                 final Object value,
209                                 final Appendable output)
210                                 throws IOException {
211     DEFAULT_PRINTER.printField(field, value, new TextGenerator(output));
212   }
213 
printFieldToString(final FieldDescriptor field, final Object value)214   public static String printFieldToString(final FieldDescriptor field,
215                                           final Object value) {
216     try {
217       final StringBuilder text = new StringBuilder();
218       printField(field, value, text);
219       return text.toString();
220     } catch (IOException e) {
221       throw new IllegalStateException(e);
222     }
223   }
224 
225   /**
226    * Outputs a textual representation of the value of given field value.
227    *
228    * @param field the descriptor of the field
229    * @param value the value of the field
230    * @param output the output to which to append the formatted value
231    * @throws ClassCastException if the value is not appropriate for the
232    *     given field descriptor
233    * @throws IOException if there is an exception writing to the output
234    */
printFieldValue(final FieldDescriptor field, final Object value, final Appendable output)235   public static void printFieldValue(final FieldDescriptor field,
236                                      final Object value,
237                                      final Appendable output)
238                                      throws IOException {
239     DEFAULT_PRINTER.printFieldValue(field, value, new TextGenerator(output));
240   }
241 
242   /**
243    * Outputs a textual representation of the value of an unknown field.
244    *
245    * @param tag the field's tag number
246    * @param value the value of the field
247    * @param output the output to which to append the formatted value
248    * @throws ClassCastException if the value is not appropriate for the
249    *     given field descriptor
250    * @throws IOException if there is an exception writing to the output
251    */
printUnknownFieldValue(final int tag, final Object value, final Appendable output)252   public static void printUnknownFieldValue(final int tag,
253                                             final Object value,
254                                             final Appendable output)
255                                             throws IOException {
256     printUnknownFieldValue(tag, value, new TextGenerator(output));
257   }
258 
printUnknownFieldValue(final int tag, final Object value, final TextGenerator generator)259   private static void printUnknownFieldValue(final int tag,
260                                              final Object value,
261                                              final TextGenerator generator)
262                                              throws IOException {
263     switch (WireFormat.getTagWireType(tag)) {
264       case WireFormat.WIRETYPE_VARINT:
265         generator.print(unsignedToString((Long) value));
266         break;
267       case WireFormat.WIRETYPE_FIXED32:
268         generator.print(
269             String.format((Locale) null, "0x%08x", (Integer) value));
270         break;
271       case WireFormat.WIRETYPE_FIXED64:
272         generator.print(String.format((Locale) null, "0x%016x", (Long) value));
273         break;
274       case WireFormat.WIRETYPE_LENGTH_DELIMITED:
275         generator.print("\"");
276         generator.print(escapeBytes((ByteString) value));
277         generator.print("\"");
278         break;
279       case WireFormat.WIRETYPE_START_GROUP:
280         DEFAULT_PRINTER.printUnknownFields((UnknownFieldSet) value, generator);
281         break;
282       default:
283         throw new IllegalArgumentException("Bad tag: " + tag);
284     }
285   }
286 
287   /** Helper class for converting protobufs to text. */
288   private static final class Printer {
289     /** Whether to omit newlines from the output. */
290     boolean singleLineMode = false;
291 
292     /** Whether to escape non ASCII characters with backslash and octal. */
293     boolean escapeNonAscii = true;
294 
Printer()295     private Printer() {}
296 
297     /** Setter of singleLineMode */
setSingleLineMode(boolean singleLineMode)298     private Printer setSingleLineMode(boolean singleLineMode) {
299       this.singleLineMode = singleLineMode;
300       return this;
301     }
302 
303     /** Setter of escapeNonAscii */
setEscapeNonAscii(boolean escapeNonAscii)304     private Printer setEscapeNonAscii(boolean escapeNonAscii) {
305       this.escapeNonAscii = escapeNonAscii;
306       return this;
307     }
308 
print( final MessageOrBuilder message, final TextGenerator generator)309     private void print(
310         final MessageOrBuilder message, final TextGenerator generator)
311         throws IOException {
312       for (Map.Entry<FieldDescriptor, Object> field
313           : message.getAllFields().entrySet()) {
314         printField(field.getKey(), field.getValue(), generator);
315       }
316       printUnknownFields(message.getUnknownFields(), generator);
317     }
318 
printField(final FieldDescriptor field, final Object value, final TextGenerator generator)319     private void printField(final FieldDescriptor field, final Object value,
320         final TextGenerator generator) throws IOException {
321       if (field.isRepeated()) {
322         // Repeated field.  Print each element.
323         for (Object element : (List<?>) value) {
324           printSingleField(field, element, generator);
325         }
326       } else {
327         printSingleField(field, value, generator);
328       }
329     }
330 
printSingleField(final FieldDescriptor field, final Object value, final TextGenerator generator)331     private void printSingleField(final FieldDescriptor field,
332                                   final Object value,
333                                   final TextGenerator generator)
334                                   throws IOException {
335       if (field.isExtension()) {
336         generator.print("[");
337         // We special-case MessageSet elements for compatibility with proto1.
338         if (field.getContainingType().getOptions().getMessageSetWireFormat()
339             && (field.getType() == FieldDescriptor.Type.MESSAGE)
340             && (field.isOptional())
341             // object equality
342             && (field.getExtensionScope() == field.getMessageType())) {
343           generator.print(field.getMessageType().getFullName());
344         } else {
345           generator.print(field.getFullName());
346         }
347         generator.print("]");
348       } else {
349         if (field.getType() == FieldDescriptor.Type.GROUP) {
350           // Groups must be serialized with their original capitalization.
351           generator.print(field.getMessageType().getName());
352         } else {
353           generator.print(field.getName());
354         }
355       }
356 
357       if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
358         if (singleLineMode) {
359           generator.print(" { ");
360         } else {
361           generator.print(" {\n");
362           generator.indent();
363         }
364       } else {
365         generator.print(": ");
366       }
367 
368       printFieldValue(field, value, generator);
369 
370       if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
371         if (singleLineMode) {
372           generator.print("} ");
373         } else {
374           generator.outdent();
375           generator.print("}\n");
376         }
377       } else {
378         if (singleLineMode) {
379           generator.print(" ");
380         } else {
381           generator.print("\n");
382         }
383       }
384     }
385 
printFieldValue(final FieldDescriptor field, final Object value, final TextGenerator generator)386     private void printFieldValue(final FieldDescriptor field,
387                                  final Object value,
388                                  final TextGenerator generator)
389                                  throws IOException {
390       switch (field.getType()) {
391         case INT32:
392         case SINT32:
393         case SFIXED32:
394           generator.print(((Integer) value).toString());
395           break;
396 
397         case INT64:
398         case SINT64:
399         case SFIXED64:
400           generator.print(((Long) value).toString());
401           break;
402 
403         case BOOL:
404           generator.print(((Boolean) value).toString());
405           break;
406 
407         case FLOAT:
408           generator.print(((Float) value).toString());
409           break;
410 
411         case DOUBLE:
412           generator.print(((Double) value).toString());
413           break;
414 
415         case UINT32:
416         case FIXED32:
417           generator.print(unsignedToString((Integer) value));
418           break;
419 
420         case UINT64:
421         case FIXED64:
422           generator.print(unsignedToString((Long) value));
423           break;
424 
425         case STRING:
426           generator.print("\"");
427           generator.print(escapeNonAscii
428               ? TextFormatEscaper.escapeText((String) value)
429               : escapeDoubleQuotesAndBackslashes((String) value)
430                   .replace("\n", "\\n"));
431           generator.print("\"");
432           break;
433 
434         case BYTES:
435           generator.print("\"");
436           if (value instanceof ByteString) {
437             generator.print(escapeBytes((ByteString) value));
438           } else {
439             generator.print(escapeBytes((byte[]) value));
440           }
441           generator.print("\"");
442           break;
443 
444         case ENUM:
445           generator.print(((EnumValueDescriptor) value).getName());
446           break;
447 
448         case MESSAGE:
449         case GROUP:
450           print((Message) value, generator);
451           break;
452       }
453     }
454 
printUnknownFields(final UnknownFieldSet unknownFields, final TextGenerator generator)455     private void printUnknownFields(final UnknownFieldSet unknownFields,
456                                     final TextGenerator generator)
457                                     throws IOException {
458       for (Map.Entry<Integer, UnknownFieldSet.Field> entry :
459                unknownFields.asMap().entrySet()) {
460         final int number = entry.getKey();
461         final UnknownFieldSet.Field field = entry.getValue();
462         printUnknownField(number, WireFormat.WIRETYPE_VARINT,
463             field.getVarintList(), generator);
464         printUnknownField(number, WireFormat.WIRETYPE_FIXED32,
465             field.getFixed32List(), generator);
466         printUnknownField(number, WireFormat.WIRETYPE_FIXED64,
467             field.getFixed64List(), generator);
468         printUnknownField(number, WireFormat.WIRETYPE_LENGTH_DELIMITED,
469             field.getLengthDelimitedList(), generator);
470         for (final UnknownFieldSet value : field.getGroupList()) {
471           generator.print(entry.getKey().toString());
472           if (singleLineMode) {
473             generator.print(" { ");
474           } else {
475             generator.print(" {\n");
476             generator.indent();
477           }
478           printUnknownFields(value, generator);
479           if (singleLineMode) {
480             generator.print("} ");
481           } else {
482             generator.outdent();
483             generator.print("}\n");
484           }
485         }
486       }
487     }
488 
printUnknownField(final int number, final int wireType, final List<?> values, final TextGenerator generator)489     private void printUnknownField(final int number,
490                                    final int wireType,
491                                    final List<?> values,
492                                    final TextGenerator generator)
493                                    throws IOException {
494       for (final Object value : values) {
495         generator.print(String.valueOf(number));
496         generator.print(": ");
497         printUnknownFieldValue(wireType, value, generator);
498         generator.print(singleLineMode ? " " : "\n");
499       }
500     }
501   }
502 
503   /** Convert an unsigned 32-bit integer to a string. */
unsignedToString(final int value)504   public static String unsignedToString(final int value) {
505     if (value >= 0) {
506       return Integer.toString(value);
507     } else {
508       return Long.toString(value & 0x00000000FFFFFFFFL);
509     }
510   }
511 
512   /** Convert an unsigned 64-bit integer to a string. */
unsignedToString(final long value)513   public static String unsignedToString(final long value) {
514     if (value >= 0) {
515       return Long.toString(value);
516     } else {
517       // Pull off the most-significant bit so that BigInteger doesn't think
518       // the number is negative, then set it again using setBit().
519       return BigInteger.valueOf(value & 0x7FFFFFFFFFFFFFFFL)
520                        .setBit(63).toString();
521     }
522   }
523 
524   /**
525    * An inner class for writing text to the output stream.
526    */
527   private static final class TextGenerator {
528     private final Appendable output;
529     private final StringBuilder indent = new StringBuilder();
530     private boolean atStartOfLine = true;
531 
TextGenerator(final Appendable output)532     private TextGenerator(final Appendable output) {
533       this.output = output;
534     }
535 
536     /**
537      * Indent text by two spaces.  After calling Indent(), two spaces will be
538      * inserted at the beginning of each line of text.  Indent() may be called
539      * multiple times to produce deeper indents.
540      */
indent()541     public void indent() {
542       indent.append("  ");
543     }
544 
545     /**
546      * Reduces the current indent level by two spaces, or crashes if the indent
547      * level is zero.
548      */
outdent()549     public void outdent() {
550       final int length = indent.length();
551       if (length == 0) {
552         throw new IllegalArgumentException(
553             " Outdent() without matching Indent().");
554       }
555       indent.delete(length - 2, length);
556     }
557 
558     /**
559      * Print text to the output stream.
560      */
print(final CharSequence text)561     public void print(final CharSequence text) throws IOException {
562       final int size = text.length();
563       int pos = 0;
564 
565       for (int i = 0; i < size; i++) {
566         if (text.charAt(i) == '\n') {
567           write(text.subSequence(pos, i + 1));
568           pos = i + 1;
569           atStartOfLine = true;
570         }
571       }
572       write(text.subSequence(pos, size));
573     }
574 
write(final CharSequence data)575     private void write(final CharSequence data) throws IOException {
576       if (data.length() == 0) {
577         return;
578       }
579       if (atStartOfLine) {
580         atStartOfLine = false;
581         output.append(indent);
582       }
583       output.append(data);
584     }
585   }
586 
587   // =================================================================
588   // Parsing
589 
590   /**
591    * Represents a stream of tokens parsed from a {@code String}.
592    *
593    * <p>The Java standard library provides many classes that you might think
594    * would be useful for implementing this, but aren't.  For example:
595    *
596    * <ul>
597    * <li>{@code java.io.StreamTokenizer}:  This almost does what we want -- or,
598    *   at least, something that would get us close to what we want -- except
599    *   for one fatal flaw:  It automatically un-escapes strings using Java
600    *   escape sequences, which do not include all the escape sequences we
601    *   need to support (e.g. '\x').
602    * <li>{@code java.util.Scanner}:  This seems like a great way at least to
603    *   parse regular expressions out of a stream (so we wouldn't have to load
604    *   the entire input into a single string before parsing).  Sadly,
605    *   {@code Scanner} requires that tokens be delimited with some delimiter.
606    *   Thus, although the text "foo:" should parse to two tokens ("foo" and
607    *   ":"), {@code Scanner} would recognize it only as a single token.
608    *   Furthermore, {@code Scanner} provides no way to inspect the contents
609    *   of delimiters, making it impossible to keep track of line and column
610    *   numbers.
611    * </ul>
612    *
613    * <p>Luckily, Java's regular expression support does manage to be useful to
614    * us.  (Barely:  We need {@code Matcher.usePattern()}, which is new in
615    * Java 1.5.)  So, we can use that, at least.  Unfortunately, this implies
616    * that we need to have the entire input in one contiguous string.
617    */
618   private static final class Tokenizer {
619     private final CharSequence text;
620     private final Matcher matcher;
621     private String currentToken;
622 
623     // The character index within this.text at which the current token begins.
624     private int pos = 0;
625 
626     // The line and column numbers of the current token.
627     private int line = 0;
628     private int column = 0;
629 
630     // The line and column numbers of the previous token (allows throwing
631     // errors *after* consuming).
632     private int previousLine = 0;
633     private int previousColumn = 0;
634 
635     // We use possessive quantifiers (*+ and ++) because otherwise the Java
636     // regex matcher has stack overflows on large inputs.
637     private static final Pattern WHITESPACE =
638       Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE);
639     private static final Pattern TOKEN = Pattern.compile(
640       "[a-zA-Z_][0-9a-zA-Z_+-]*+|" +                // an identifier
641       "[.]?[0-9+-][0-9a-zA-Z_.+-]*+|" +             // a number
642       "\"([^\"\n\\\\]|\\\\.)*+(\"|\\\\?$)|" +       // a double-quoted string
643       "\'([^\'\n\\\\]|\\\\.)*+(\'|\\\\?$)",         // a single-quoted string
644       Pattern.MULTILINE);
645 
646     private static final Pattern DOUBLE_INFINITY = Pattern.compile(
647       "-?inf(inity)?",
648       Pattern.CASE_INSENSITIVE);
649     private static final Pattern FLOAT_INFINITY = Pattern.compile(
650       "-?inf(inity)?f?",
651       Pattern.CASE_INSENSITIVE);
652     private static final Pattern FLOAT_NAN = Pattern.compile(
653       "nanf?",
654       Pattern.CASE_INSENSITIVE);
655 
656     /** Construct a tokenizer that parses tokens from the given text. */
Tokenizer(final CharSequence text)657     private Tokenizer(final CharSequence text) {
658       this.text = text;
659       this.matcher = WHITESPACE.matcher(text);
660       skipWhitespace();
661       nextToken();
662     }
663 
getLine()664     int getLine() {
665       return line;
666     }
667 
getColumn()668     int getColumn() {
669       return column;
670     }
671 
672     /** Are we at the end of the input? */
atEnd()673     public boolean atEnd() {
674       return currentToken.length() == 0;
675     }
676 
677     /** Advance to the next token. */
nextToken()678     public void nextToken() {
679       previousLine = line;
680       previousColumn = column;
681 
682       // Advance the line counter to the current position.
683       while (pos < matcher.regionStart()) {
684         if (text.charAt(pos) == '\n') {
685           ++line;
686           column = 0;
687         } else {
688           ++column;
689         }
690         ++pos;
691       }
692 
693       // Match the next token.
694       if (matcher.regionStart() == matcher.regionEnd()) {
695         // EOF
696         currentToken = "";
697       } else {
698         matcher.usePattern(TOKEN);
699         if (matcher.lookingAt()) {
700           currentToken = matcher.group();
701           matcher.region(matcher.end(), matcher.regionEnd());
702         } else {
703           // Take one character.
704           currentToken = String.valueOf(text.charAt(pos));
705           matcher.region(pos + 1, matcher.regionEnd());
706         }
707 
708         skipWhitespace();
709       }
710     }
711 
712     /**
713      * Skip over any whitespace so that the matcher region starts at the next
714      * token.
715      */
skipWhitespace()716     private void skipWhitespace() {
717       matcher.usePattern(WHITESPACE);
718       if (matcher.lookingAt()) {
719         matcher.region(matcher.end(), matcher.regionEnd());
720       }
721     }
722 
723     /**
724      * If the next token exactly matches {@code token}, consume it and return
725      * {@code true}.  Otherwise, return {@code false} without doing anything.
726      */
tryConsume(final String token)727     public boolean tryConsume(final String token) {
728       if (currentToken.equals(token)) {
729         nextToken();
730         return true;
731       } else {
732         return false;
733       }
734     }
735 
736     /**
737      * If the next token exactly matches {@code token}, consume it.  Otherwise,
738      * throw a {@link ParseException}.
739      */
consume(final String token)740     public void consume(final String token) throws ParseException {
741       if (!tryConsume(token)) {
742         throw parseException("Expected \"" + token + "\".");
743       }
744     }
745 
746     /**
747      * Returns {@code true} if the next token is an integer, but does
748      * not consume it.
749      */
lookingAtInteger()750     public boolean lookingAtInteger() {
751       if (currentToken.length() == 0) {
752         return false;
753       }
754 
755       final char c = currentToken.charAt(0);
756       return ('0' <= c && c <= '9')
757           || c == '-' || c == '+';
758     }
759 
760     /**
761      * Returns {@code true} if the current token's text is equal to that
762      * specified.
763      */
lookingAt(String text)764     public boolean lookingAt(String text) {
765       return currentToken.equals(text);
766     }
767 
768     /**
769      * If the next token is an identifier, consume it and return its value.
770      * Otherwise, throw a {@link ParseException}.
771      */
consumeIdentifier()772     public String consumeIdentifier() throws ParseException {
773       for (int i = 0; i < currentToken.length(); i++) {
774         final char c = currentToken.charAt(i);
775         if (('a' <= c && c <= 'z')
776             || ('A' <= c && c <= 'Z')
777             || ('0' <= c && c <= '9')
778             || (c == '_') || (c == '.')) {
779           // OK
780         } else {
781           throw parseException(
782               "Expected identifier. Found '" + currentToken + "'");
783         }
784       }
785 
786       final String result = currentToken;
787       nextToken();
788       return result;
789     }
790 
791     /**
792      * If the next token is an identifier, consume it and return {@code true}.
793      * Otherwise, return {@code false} without doing anything.
794      */
tryConsumeIdentifier()795     public boolean tryConsumeIdentifier() {
796       try {
797         consumeIdentifier();
798         return true;
799       } catch (ParseException e) {
800         return false;
801       }
802     }
803 
804     /**
805      * If the next token is a 32-bit signed integer, consume it and return its
806      * value.  Otherwise, throw a {@link ParseException}.
807      */
consumeInt32()808     public int consumeInt32() throws ParseException {
809       try {
810         final int result = parseInt32(currentToken);
811         nextToken();
812         return result;
813       } catch (NumberFormatException e) {
814         throw integerParseException(e);
815       }
816     }
817 
818     /**
819      * If the next token is a 32-bit unsigned integer, consume it and return its
820      * value.  Otherwise, throw a {@link ParseException}.
821      */
consumeUInt32()822     public int consumeUInt32() throws ParseException {
823       try {
824         final int result = parseUInt32(currentToken);
825         nextToken();
826         return result;
827       } catch (NumberFormatException e) {
828         throw integerParseException(e);
829       }
830     }
831 
832     /**
833      * If the next token is a 64-bit signed integer, consume it and return its
834      * value.  Otherwise, throw a {@link ParseException}.
835      */
consumeInt64()836     public long consumeInt64() throws ParseException {
837       try {
838         final long result = parseInt64(currentToken);
839         nextToken();
840         return result;
841       } catch (NumberFormatException e) {
842         throw integerParseException(e);
843       }
844     }
845 
846     /**
847      * If the next token is a 64-bit signed integer, consume it and return
848      * {@code true}.  Otherwise, return {@code false} without doing anything.
849      */
tryConsumeInt64()850     public boolean tryConsumeInt64() {
851       try {
852         consumeInt64();
853         return true;
854       } catch (ParseException e) {
855         return false;
856       }
857     }
858 
859     /**
860      * If the next token is a 64-bit unsigned integer, consume it and return its
861      * value.  Otherwise, throw a {@link ParseException}.
862      */
consumeUInt64()863     public long consumeUInt64() throws ParseException {
864       try {
865         final long result = parseUInt64(currentToken);
866         nextToken();
867         return result;
868       } catch (NumberFormatException e) {
869         throw integerParseException(e);
870       }
871     }
872 
873     /**
874      * If the next token is a 64-bit unsigned integer, consume it and return
875      * {@code true}.  Otherwise, return {@code false} without doing anything.
876      */
tryConsumeUInt64()877     public boolean tryConsumeUInt64() {
878       try {
879         consumeUInt64();
880         return true;
881       } catch (ParseException e) {
882         return false;
883       }
884     }
885 
886     /**
887      * If the next token is a double, consume it and return its value.
888      * Otherwise, throw a {@link ParseException}.
889      */
consumeDouble()890     public double consumeDouble() throws ParseException {
891       // We need to parse infinity and nan separately because
892       // Double.parseDouble() does not accept "inf", "infinity", or "nan".
893       if (DOUBLE_INFINITY.matcher(currentToken).matches()) {
894         final boolean negative = currentToken.startsWith("-");
895         nextToken();
896         return negative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
897       }
898       if (currentToken.equalsIgnoreCase("nan")) {
899         nextToken();
900         return Double.NaN;
901       }
902       try {
903         final double result = Double.parseDouble(currentToken);
904         nextToken();
905         return result;
906       } catch (NumberFormatException e) {
907         throw floatParseException(e);
908       }
909     }
910 
911     /**
912      * If the next token is a double, consume it and return {@code true}.
913      * Otherwise, return {@code false} without doing anything.
914      */
tryConsumeDouble()915     public boolean tryConsumeDouble() {
916       try {
917         consumeDouble();
918         return true;
919       } catch (ParseException e) {
920         return false;
921       }
922     }
923 
924     /**
925      * If the next token is a float, consume it and return its value.
926      * Otherwise, throw a {@link ParseException}.
927      */
consumeFloat()928     public float consumeFloat() throws ParseException {
929       // We need to parse infinity and nan separately because
930       // Float.parseFloat() does not accept "inf", "infinity", or "nan".
931       if (FLOAT_INFINITY.matcher(currentToken).matches()) {
932         final boolean negative = currentToken.startsWith("-");
933         nextToken();
934         return negative ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY;
935       }
936       if (FLOAT_NAN.matcher(currentToken).matches()) {
937         nextToken();
938         return Float.NaN;
939       }
940       try {
941         final float result = Float.parseFloat(currentToken);
942         nextToken();
943         return result;
944       } catch (NumberFormatException e) {
945         throw floatParseException(e);
946       }
947     }
948 
949     /**
950      * If the next token is a float, consume it and return {@code true}.
951      * Otherwise, return {@code false} without doing anything.
952      */
tryConsumeFloat()953     public boolean tryConsumeFloat() {
954       try {
955         consumeFloat();
956         return true;
957       } catch (ParseException e) {
958         return false;
959       }
960     }
961 
962     /**
963      * If the next token is a boolean, consume it and return its value.
964      * Otherwise, throw a {@link ParseException}.
965      */
consumeBoolean()966     public boolean consumeBoolean() throws ParseException {
967       if (currentToken.equals("true")
968           || currentToken.equals("True")
969           || currentToken.equals("t")
970           || currentToken.equals("1")) {
971         nextToken();
972         return true;
973       } else if (currentToken.equals("false")
974           || currentToken.equals("False")
975           || currentToken.equals("f")
976           || currentToken.equals("0")) {
977         nextToken();
978         return false;
979       } else {
980         throw parseException("Expected \"true\" or \"false\".");
981       }
982     }
983 
984     /**
985      * If the next token is a string, consume it and return its (unescaped)
986      * value.  Otherwise, throw a {@link ParseException}.
987      */
consumeString()988     public String consumeString() throws ParseException {
989       return consumeByteString().toStringUtf8();
990     }
991 
992     /**
993      * If the next token is a string, consume it and return true.  Otherwise,
994      * return false.
995      */
tryConsumeString()996     public boolean tryConsumeString() {
997       try {
998         consumeString();
999         return true;
1000       } catch (ParseException e) {
1001         return false;
1002       }
1003     }
1004 
1005     /**
1006      * If the next token is a string, consume it, unescape it as a
1007      * {@link ByteString}, and return it.  Otherwise, throw a
1008      * {@link ParseException}.
1009      */
consumeByteString()1010     public ByteString consumeByteString() throws ParseException {
1011       List<ByteString> list = new ArrayList<ByteString>();
1012       consumeByteString(list);
1013       while (currentToken.startsWith("'") || currentToken.startsWith("\"")) {
1014         consumeByteString(list);
1015       }
1016       return ByteString.copyFrom(list);
1017     }
1018 
1019     /**
1020      * Like {@link #consumeByteString()} but adds each token of the string to
1021      * the given list.  String literals (whether bytes or text) may come in
1022      * multiple adjacent tokens which are automatically concatenated, like in
1023      * C or Python.
1024      */
consumeByteString(List<ByteString> list)1025     private void consumeByteString(List<ByteString> list)
1026         throws ParseException {
1027       final char quote = currentToken.length() > 0
1028           ? currentToken.charAt(0)
1029           : '\0';
1030       if (quote != '\"' && quote != '\'') {
1031         throw parseException("Expected string.");
1032       }
1033 
1034       if (currentToken.length() < 2
1035           || currentToken.charAt(currentToken.length() - 1) != quote) {
1036         throw parseException("String missing ending quote.");
1037       }
1038 
1039       try {
1040         final String escaped =
1041             currentToken.substring(1, currentToken.length() - 1);
1042         final ByteString result = unescapeBytes(escaped);
1043         nextToken();
1044         list.add(result);
1045       } catch (InvalidEscapeSequenceException e) {
1046         throw parseException(e.getMessage());
1047       }
1048     }
1049 
1050     /**
1051      * Returns a {@link ParseException} with the current line and column
1052      * numbers in the description, suitable for throwing.
1053      */
parseException(final String description)1054     public ParseException parseException(final String description) {
1055       // Note:  People generally prefer one-based line and column numbers.
1056       return new ParseException(
1057         line + 1, column + 1, description);
1058     }
1059 
1060     /**
1061      * Returns a {@link ParseException} with the line and column numbers of
1062      * the previous token in the description, suitable for throwing.
1063      */
parseExceptionPreviousToken( final String description)1064     public ParseException parseExceptionPreviousToken(
1065         final String description) {
1066       // Note:  People generally prefer one-based line and column numbers.
1067       return new ParseException(
1068         previousLine + 1, previousColumn + 1, description);
1069     }
1070 
1071     /**
1072      * Constructs an appropriate {@link ParseException} for the given
1073      * {@code NumberFormatException} when trying to parse an integer.
1074      */
integerParseException( final NumberFormatException e)1075     private ParseException integerParseException(
1076         final NumberFormatException e) {
1077       return parseException("Couldn't parse integer: " + e.getMessage());
1078     }
1079 
1080     /**
1081      * Constructs an appropriate {@link ParseException} for the given
1082      * {@code NumberFormatException} when trying to parse a float or double.
1083      */
floatParseException(final NumberFormatException e)1084     private ParseException floatParseException(final NumberFormatException e) {
1085       return parseException("Couldn't parse number: " + e.getMessage());
1086     }
1087 
1088     /**
1089      * Returns a {@link UnknownFieldParseException} with the line and column
1090      * numbers of the previous token in the description, and the unknown field
1091      * name, suitable for throwing.
1092      */
unknownFieldParseExceptionPreviousToken( final String unknownField, final String description)1093     public UnknownFieldParseException unknownFieldParseExceptionPreviousToken(
1094         final String unknownField, final String description) {
1095       // Note:  People generally prefer one-based line and column numbers.
1096       return new UnknownFieldParseException(
1097         previousLine + 1, previousColumn + 1, unknownField, description);
1098     }
1099   }
1100 
1101   /** Thrown when parsing an invalid text format message. */
1102   public static class ParseException extends IOException {
1103     private static final long serialVersionUID = 3196188060225107702L;
1104 
1105     private final int line;
1106     private final int column;
1107 
1108     /** Create a new instance, with -1 as the line and column numbers. */
ParseException(final String message)1109     public ParseException(final String message) {
1110       this(-1, -1, message);
1111     }
1112 
1113     /**
1114      * Create a new instance
1115      *
1116      * @param line the line number where the parse error occurred,
1117      * using 1-offset.
1118      * @param column the column number where the parser error occurred,
1119      * using 1-offset.
1120      */
ParseException(final int line, final int column, final String message)1121     public ParseException(final int line, final int column,
1122         final String message) {
1123       super(Integer.toString(line) + ":" + column + ": " + message);
1124       this.line = line;
1125       this.column = column;
1126     }
1127 
1128     /**
1129      * Return the line where the parse exception occurred, or -1 when
1130      * none is provided. The value is specified as 1-offset, so the first
1131      * line is line 1.
1132      */
getLine()1133     public int getLine() {
1134       return line;
1135     }
1136 
1137     /**
1138      * Return the column where the parse exception occurred, or -1 when
1139      * none is provided. The value is specified as 1-offset, so the first
1140      * line is line 1.
1141      */
getColumn()1142     public int getColumn() {
1143       return column;
1144     }
1145   }
1146 
1147   /**
1148    * Thrown when encountering an unknown field while parsing
1149    * a text format message.
1150    */
1151   public static class UnknownFieldParseException extends ParseException {
1152     private final String unknownField;
1153 
1154     /**
1155      * Create a new instance, with -1 as the line and column numbers, and an
1156      * empty unknown field name.
1157      */
UnknownFieldParseException(final String message)1158     public UnknownFieldParseException(final String message) {
1159       this(-1, -1, "", message);
1160     }
1161 
1162     /**
1163      * Create a new instance
1164      *
1165      * @param line the line number where the parse error occurred,
1166      * using 1-offset.
1167      * @param column the column number where the parser error occurred,
1168      * using 1-offset.
1169      * @param unknownField the name of the unknown field found while parsing.
1170      */
UnknownFieldParseException(final int line, final int column, final String unknownField, final String message)1171     public UnknownFieldParseException(final int line, final int column,
1172         final String unknownField, final String message) {
1173       super(line, column, message);
1174       this.unknownField = unknownField;
1175     }
1176 
1177     /**
1178      * Return the name of the unknown field encountered while parsing the
1179      * protocol buffer string.
1180      */
getUnknownField()1181     public String getUnknownField() {
1182       return unknownField;
1183     }
1184   }
1185 
1186   private static final Parser PARSER = Parser.newBuilder().build();
1187 
1188   /**
1189    * Return a {@link Parser} instance which can parse text-format
1190    * messages. The returned instance is thread-safe.
1191    */
getParser()1192   public static Parser getParser() {
1193     return PARSER;
1194   }
1195 
1196   /**
1197    * Parse a text-format message from {@code input} and merge the contents
1198    * into {@code builder}.
1199    */
merge(final Readable input, final Message.Builder builder)1200   public static void merge(final Readable input,
1201                            final Message.Builder builder)
1202                            throws IOException {
1203     PARSER.merge(input, builder);
1204   }
1205 
1206   /**
1207    * Parse a text-format message from {@code input} and merge the contents
1208    * into {@code builder}.
1209    */
merge(final CharSequence input, final Message.Builder builder)1210   public static void merge(final CharSequence input,
1211                            final Message.Builder builder)
1212                            throws ParseException {
1213     PARSER.merge(input, builder);
1214   }
1215 
1216   /**
1217    * Parse a text-format message from {@code input} and merge the contents
1218    * into {@code builder}.  Extensions will be recognized if they are
1219    * registered in {@code extensionRegistry}.
1220    */
merge(final Readable input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1221   public static void merge(final Readable input,
1222                            final ExtensionRegistry extensionRegistry,
1223                            final Message.Builder builder)
1224                            throws IOException {
1225     PARSER.merge(input, extensionRegistry, builder);
1226   }
1227 
1228 
1229   /**
1230    * Parse a text-format message from {@code input} and merge the contents
1231    * into {@code builder}.  Extensions will be recognized if they are
1232    * registered in {@code extensionRegistry}.
1233    */
merge(final CharSequence input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1234   public static void merge(final CharSequence input,
1235                            final ExtensionRegistry extensionRegistry,
1236                            final Message.Builder builder)
1237                            throws ParseException {
1238     PARSER.merge(input, extensionRegistry, builder);
1239   }
1240 
1241 
1242   /**
1243    * Parser for text-format proto2 instances. This class is thread-safe.
1244    * The implementation largely follows google/protobuf/text_format.cc.
1245    *
1246    * <p>Use {@link TextFormat#getParser()} to obtain the default parser, or
1247    * {@link Builder} to control the parser behavior.
1248    */
1249   public static class Parser {
1250     /**
1251      * Determines if repeated values for non-repeated fields and
1252      * oneofs are permitted. For example, given required/optional field "foo"
1253      * and a oneof containing "baz" and "qux":
1254      * <ul>
1255      * <li>"foo: 1 foo: 2"
1256      * <li>"baz: 1 qux: 2"
1257      * <li>merging "foo: 2" into a proto in which foo is already set, or
1258      * <li>merging "qux: 2" into a proto in which baz is already set.
1259      * </ul>
1260      */
1261     public enum SingularOverwritePolicy {
1262       /** The last value is retained. */
1263       ALLOW_SINGULAR_OVERWRITES,
1264       /** An error is issued. */
1265       FORBID_SINGULAR_OVERWRITES
1266     }
1267 
1268     private final boolean allowUnknownFields;
1269     private final SingularOverwritePolicy singularOverwritePolicy;
1270     private TextFormatParseInfoTree.Builder parseInfoTreeBuilder;
1271 
Parser( boolean allowUnknownFields, SingularOverwritePolicy singularOverwritePolicy, TextFormatParseInfoTree.Builder parseInfoTreeBuilder)1272     private Parser(
1273         boolean allowUnknownFields, SingularOverwritePolicy singularOverwritePolicy,
1274         TextFormatParseInfoTree.Builder parseInfoTreeBuilder) {
1275       this.allowUnknownFields = allowUnknownFields;
1276       this.singularOverwritePolicy = singularOverwritePolicy;
1277       this.parseInfoTreeBuilder = parseInfoTreeBuilder;
1278     }
1279 
1280     /**
1281      * Returns a new instance of {@link Builder}.
1282      */
newBuilder()1283     public static Builder newBuilder() {
1284       return new Builder();
1285     }
1286 
1287     /**
1288      * Builder that can be used to obtain new instances of {@link Parser}.
1289      */
1290     public static class Builder {
1291       private boolean allowUnknownFields = false;
1292       private SingularOverwritePolicy singularOverwritePolicy =
1293           SingularOverwritePolicy.ALLOW_SINGULAR_OVERWRITES;
1294       private TextFormatParseInfoTree.Builder parseInfoTreeBuilder = null;
1295 
1296 
1297       /**
1298        * Sets parser behavior when a non-repeated field appears more than once.
1299        */
setSingularOverwritePolicy(SingularOverwritePolicy p)1300       public Builder setSingularOverwritePolicy(SingularOverwritePolicy p) {
1301         this.singularOverwritePolicy = p;
1302         return this;
1303       }
1304 
setParseInfoTreeBuilder( TextFormatParseInfoTree.Builder parseInfoTreeBuilder)1305       public Builder setParseInfoTreeBuilder(
1306           TextFormatParseInfoTree.Builder parseInfoTreeBuilder) {
1307         this.parseInfoTreeBuilder = parseInfoTreeBuilder;
1308         return this;
1309       }
1310 
build()1311       public Parser build() {
1312         return new Parser(
1313             allowUnknownFields, singularOverwritePolicy, parseInfoTreeBuilder);
1314       }
1315     }
1316 
1317     /**
1318      * Parse a text-format message from {@code input} and merge the contents
1319      * into {@code builder}.
1320      */
merge(final Readable input, final Message.Builder builder)1321     public void merge(final Readable input,
1322                       final Message.Builder builder)
1323                       throws IOException {
1324       merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
1325     }
1326 
1327     /**
1328      * Parse a text-format message from {@code input} and merge the contents
1329      * into {@code builder}.
1330      */
merge(final CharSequence input, final Message.Builder builder)1331     public void merge(final CharSequence input,
1332                       final Message.Builder builder)
1333                       throws ParseException {
1334       merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
1335     }
1336 
1337     /**
1338      * Parse a text-format message from {@code input} and merge the contents
1339      * into {@code builder}.  Extensions will be recognized if they are
1340      * registered in {@code extensionRegistry}.
1341      */
merge(final Readable input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1342     public void merge(final Readable input,
1343                       final ExtensionRegistry extensionRegistry,
1344                       final Message.Builder builder)
1345                       throws IOException {
1346       // Read the entire input to a String then parse that.
1347 
1348       // If StreamTokenizer were not quite so crippled, or if there were a kind
1349       // of Reader that could read in chunks that match some particular regex,
1350       // or if we wanted to write a custom Reader to tokenize our stream, then
1351       // we would not have to read to one big String.  Alas, none of these is
1352       // the case.  Oh well.
1353 
1354       merge(toStringBuilder(input), extensionRegistry, builder);
1355     }
1356 
1357 
1358     private static final int BUFFER_SIZE = 4096;
1359 
1360     // TODO(chrisn): See if working around java.io.Reader#read(CharBuffer)
1361     // overhead is worthwhile
toStringBuilder(final Readable input)1362     private static StringBuilder toStringBuilder(final Readable input)
1363         throws IOException {
1364       final StringBuilder text = new StringBuilder();
1365       final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE);
1366       while (true) {
1367         final int n = input.read(buffer);
1368         if (n == -1) {
1369           break;
1370         }
1371         buffer.flip();
1372         text.append(buffer, 0, n);
1373       }
1374       return text;
1375     }
1376 
1377     /**
1378      * Parse a text-format message from {@code input} and merge the contents
1379      * into {@code builder}.  Extensions will be recognized if they are
1380      * registered in {@code extensionRegistry}.
1381      */
merge(final CharSequence input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1382     public void merge(final CharSequence input,
1383                       final ExtensionRegistry extensionRegistry,
1384                       final Message.Builder builder)
1385                       throws ParseException {
1386       final Tokenizer tokenizer = new Tokenizer(input);
1387       MessageReflection.BuilderAdapter target =
1388           new MessageReflection.BuilderAdapter(builder);
1389 
1390       while (!tokenizer.atEnd()) {
1391         mergeField(tokenizer, extensionRegistry, target);
1392       }
1393     }
1394 
1395 
1396     /**
1397      * Parse a single field from {@code tokenizer} and merge it into
1398      * {@code builder}.
1399      */
mergeField(final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target)1400     private void mergeField(final Tokenizer tokenizer,
1401                             final ExtensionRegistry extensionRegistry,
1402                             final MessageReflection.MergeTarget target)
1403                             throws ParseException {
1404       mergeField(tokenizer, extensionRegistry, target, parseInfoTreeBuilder);
1405     }
1406 
1407     /**
1408      * Parse a single field from {@code tokenizer} and merge it into
1409      * {@code builder}.
1410      */
mergeField(final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, TextFormatParseInfoTree.Builder parseTreeBuilder)1411     private void mergeField(final Tokenizer tokenizer,
1412                             final ExtensionRegistry extensionRegistry,
1413                             final MessageReflection.MergeTarget target,
1414                             TextFormatParseInfoTree.Builder parseTreeBuilder)
1415                             throws ParseException {
1416       FieldDescriptor field = null;
1417       int startLine = tokenizer.getLine();
1418       int startColumn = tokenizer.getColumn();
1419       final Descriptor type = target.getDescriptorForType();
1420       ExtensionRegistry.ExtensionInfo extension = null;
1421 
1422       if (tokenizer.tryConsume("[")) {
1423         // An extension.
1424         final StringBuilder name =
1425             new StringBuilder(tokenizer.consumeIdentifier());
1426         while (tokenizer.tryConsume(".")) {
1427           name.append('.');
1428           name.append(tokenizer.consumeIdentifier());
1429         }
1430 
1431         extension = target.findExtensionByName(
1432             extensionRegistry, name.toString());
1433 
1434         if (extension == null) {
1435           if (!allowUnknownFields) {
1436             throw tokenizer.parseExceptionPreviousToken(
1437               "Extension \"" + name + "\" not found in the ExtensionRegistry.");
1438           } else {
1439             logger.warning(
1440               "Extension \"" + name + "\" not found in the ExtensionRegistry.");
1441           }
1442         } else {
1443           if (extension.descriptor.getContainingType() != type) {
1444             throw tokenizer.parseExceptionPreviousToken(
1445               "Extension \"" + name + "\" does not extend message type \""
1446               + type.getFullName() + "\".");
1447           }
1448           field = extension.descriptor;
1449         }
1450 
1451         tokenizer.consume("]");
1452       } else {
1453         final String name = tokenizer.consumeIdentifier();
1454         field = type.findFieldByName(name);
1455 
1456         // Group names are expected to be capitalized as they appear in the
1457         // .proto file, which actually matches their type names, not their field
1458         // names.
1459         if (field == null) {
1460           // Explicitly specify US locale so that this code does not break when
1461           // executing in Turkey.
1462           final String lowerName = name.toLowerCase(Locale.US);
1463           field = type.findFieldByName(lowerName);
1464           // If the case-insensitive match worked but the field is NOT a group,
1465           if (field != null && field.getType() != FieldDescriptor.Type.GROUP) {
1466             field = null;
1467           }
1468         }
1469         // Again, special-case group names as described above.
1470         if (field != null && field.getType() == FieldDescriptor.Type.GROUP
1471             && !field.getMessageType().getName().equals(name)) {
1472           field = null;
1473         }
1474 
1475         if (field == null) {
1476           if (!allowUnknownFields) {
1477             throw tokenizer.unknownFieldParseExceptionPreviousToken(
1478               name,
1479               "Message type \"" + type.getFullName()
1480               + "\" has no field named \"" + name + "\".");
1481           } else {
1482             logger.warning(
1483               "Message type \"" + type.getFullName()
1484               + "\" has no field named \"" + name + "\".");
1485           }
1486         }
1487       }
1488 
1489       // Skips unknown fields.
1490       if (field == null) {
1491         // Try to guess the type of this field.
1492         // If this field is not a message, there should be a ":" between the
1493         // field name and the field value and also the field value should not
1494         // start with "{" or "<" which indicates the beginning of a message body.
1495         // If there is no ":" or there is a "{" or "<" after ":", this field has
1496         // to be a message or the input is ill-formed.
1497         if (tokenizer.tryConsume(":")
1498             && !tokenizer.lookingAt("{")
1499             && !tokenizer.lookingAt("<")) {
1500           skipFieldValue(tokenizer);
1501         } else {
1502           skipFieldMessage(tokenizer);
1503         }
1504         return;
1505       }
1506 
1507       // Handle potential ':'.
1508       if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
1509         tokenizer.tryConsume(":");  // optional
1510         if (parseTreeBuilder != null) {
1511           TextFormatParseInfoTree.Builder childParseTreeBuilder =
1512               parseTreeBuilder.getBuilderForSubMessageField(field);
1513           consumeFieldValues(tokenizer, extensionRegistry, target, field, extension,
1514               childParseTreeBuilder);
1515         } else {
1516           consumeFieldValues(tokenizer, extensionRegistry, target, field, extension,
1517               parseTreeBuilder);
1518         }
1519       } else {
1520         tokenizer.consume(":");  // required
1521         consumeFieldValues(
1522             tokenizer, extensionRegistry, target, field, extension, parseTreeBuilder);
1523       }
1524 
1525       if (parseTreeBuilder != null) {
1526         parseTreeBuilder.setLocation(
1527             field, TextFormatParseLocation.create(startLine, startColumn));
1528       }
1529 
1530       // For historical reasons, fields may optionally be separated by commas or
1531       // semicolons.
1532       if (!tokenizer.tryConsume(";")) {
1533         tokenizer.tryConsume(",");
1534       }
1535     }
1536 
1537     /**
1538      * Parse a one or more field values from {@code tokenizer} and merge it into
1539      * {@code builder}.
1540      */
consumeFieldValues( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, final FieldDescriptor field, final ExtensionRegistry.ExtensionInfo extension, final TextFormatParseInfoTree.Builder parseTreeBuilder)1541     private void consumeFieldValues(
1542         final Tokenizer tokenizer,
1543         final ExtensionRegistry extensionRegistry,
1544         final MessageReflection.MergeTarget target,
1545         final FieldDescriptor field,
1546         final ExtensionRegistry.ExtensionInfo extension,
1547         final TextFormatParseInfoTree.Builder parseTreeBuilder)
1548         throws ParseException {
1549       // Support specifying repeated field values as a comma-separated list.
1550       // Ex."foo: [1, 2, 3]"
1551       if (field.isRepeated() && tokenizer.tryConsume("[")) {
1552         while (true) {
1553           consumeFieldValue(tokenizer, extensionRegistry, target, field, extension,
1554               parseTreeBuilder);
1555           if (tokenizer.tryConsume("]")) {
1556             // End of list.
1557             break;
1558           }
1559           tokenizer.consume(",");
1560         }
1561       } else {
1562         consumeFieldValue(
1563             tokenizer, extensionRegistry, target, field, extension, parseTreeBuilder);
1564       }
1565     }
1566 
1567     /**
1568      * Parse a single field value from {@code tokenizer} and merge it into
1569      * {@code builder}.
1570      */
consumeFieldValue( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, final FieldDescriptor field, final ExtensionRegistry.ExtensionInfo extension, final TextFormatParseInfoTree.Builder parseTreeBuilder)1571     private void consumeFieldValue(
1572         final Tokenizer tokenizer,
1573         final ExtensionRegistry extensionRegistry,
1574         final MessageReflection.MergeTarget target,
1575         final FieldDescriptor field,
1576         final ExtensionRegistry.ExtensionInfo extension,
1577         final TextFormatParseInfoTree.Builder parseTreeBuilder)
1578         throws ParseException {
1579       Object value = null;
1580 
1581       if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
1582         final String endToken;
1583         if (tokenizer.tryConsume("<")) {
1584           endToken = ">";
1585         } else {
1586           tokenizer.consume("{");
1587           endToken = "}";
1588         }
1589 
1590         final MessageReflection.MergeTarget subField;
1591         subField = target.newMergeTargetForField(field,
1592             (extension == null) ? null : extension.defaultInstance);
1593 
1594         while (!tokenizer.tryConsume(endToken)) {
1595           if (tokenizer.atEnd()) {
1596             throw tokenizer.parseException(
1597               "Expected \"" + endToken + "\".");
1598           }
1599           mergeField(tokenizer, extensionRegistry, subField, parseTreeBuilder);
1600         }
1601 
1602         value = subField.finish();
1603 
1604       } else {
1605         switch (field.getType()) {
1606           case INT32:
1607           case SINT32:
1608           case SFIXED32:
1609             value = tokenizer.consumeInt32();
1610             break;
1611 
1612           case INT64:
1613           case SINT64:
1614           case SFIXED64:
1615             value = tokenizer.consumeInt64();
1616             break;
1617 
1618           case UINT32:
1619           case FIXED32:
1620             value = tokenizer.consumeUInt32();
1621             break;
1622 
1623           case UINT64:
1624           case FIXED64:
1625             value = tokenizer.consumeUInt64();
1626             break;
1627 
1628           case FLOAT:
1629             value = tokenizer.consumeFloat();
1630             break;
1631 
1632           case DOUBLE:
1633             value = tokenizer.consumeDouble();
1634             break;
1635 
1636           case BOOL:
1637             value = tokenizer.consumeBoolean();
1638             break;
1639 
1640           case STRING:
1641             value = tokenizer.consumeString();
1642             break;
1643 
1644           case BYTES:
1645             value = tokenizer.consumeByteString();
1646             break;
1647 
1648           case ENUM:
1649             final EnumDescriptor enumType = field.getEnumType();
1650 
1651             if (tokenizer.lookingAtInteger()) {
1652               final int number = tokenizer.consumeInt32();
1653               value = enumType.findValueByNumber(number);
1654               if (value == null) {
1655                 throw tokenizer.parseExceptionPreviousToken(
1656                   "Enum type \"" + enumType.getFullName()
1657                   + "\" has no value with number " + number + '.');
1658               }
1659             } else {
1660               final String id = tokenizer.consumeIdentifier();
1661               value = enumType.findValueByName(id);
1662               if (value == null) {
1663                 throw tokenizer.parseExceptionPreviousToken(
1664                   "Enum type \"" + enumType.getFullName()
1665                   + "\" has no value named \"" + id + "\".");
1666               }
1667             }
1668 
1669             break;
1670 
1671           case MESSAGE:
1672           case GROUP:
1673             throw new RuntimeException("Can't get here.");
1674         }
1675       }
1676 
1677       if (field.isRepeated()) {
1678         target.addRepeatedField(field, value);
1679       } else if ((singularOverwritePolicy
1680               == SingularOverwritePolicy.FORBID_SINGULAR_OVERWRITES)
1681           && target.hasField(field)) {
1682         throw tokenizer.parseExceptionPreviousToken("Non-repeated field \""
1683             + field.getFullName() + "\" cannot be overwritten.");
1684       } else if ((singularOverwritePolicy
1685               == SingularOverwritePolicy.FORBID_SINGULAR_OVERWRITES)
1686           && field.getContainingOneof() != null
1687           && target.hasOneof(field.getContainingOneof())) {
1688         Descriptors.OneofDescriptor oneof = field.getContainingOneof();
1689         throw tokenizer.parseExceptionPreviousToken("Field \""
1690             + field.getFullName() + "\" is specified along with field \""
1691             + target.getOneofFieldDescriptor(oneof).getFullName()
1692             + "\", another member of oneof \"" + oneof.getName() + "\".");
1693       } else {
1694         target.setField(field, value);
1695       }
1696     }
1697 
1698     /**
1699      * Skips the next field including the field's name and value.
1700      */
skipField(Tokenizer tokenizer)1701     private void skipField(Tokenizer tokenizer) throws ParseException {
1702       if (tokenizer.tryConsume("[")) {
1703         // Extension name.
1704         do {
1705           tokenizer.consumeIdentifier();
1706         } while (tokenizer.tryConsume("."));
1707         tokenizer.consume("]");
1708       } else {
1709         tokenizer.consumeIdentifier();
1710       }
1711 
1712       // Try to guess the type of this field.
1713       // If this field is not a message, there should be a ":" between the
1714       // field name and the field value and also the field value should not
1715       // start with "{" or "<" which indicates the beginning of a message body.
1716       // If there is no ":" or there is a "{" or "<" after ":", this field has
1717       // to be a message or the input is ill-formed.
1718       if (tokenizer.tryConsume(":")
1719           && !tokenizer.lookingAt("<")
1720           && !tokenizer.lookingAt("{")) {
1721         skipFieldValue(tokenizer);
1722       } else {
1723         skipFieldMessage(tokenizer);
1724       }
1725       // For historical reasons, fields may optionally be separated by commas or
1726       // semicolons.
1727       if (!tokenizer.tryConsume(";")) {
1728         tokenizer.tryConsume(",");
1729       }
1730     }
1731 
1732     /**
1733      * Skips the whole body of a message including the beginning delimiter and
1734      * the ending delimiter.
1735      */
skipFieldMessage(Tokenizer tokenizer)1736     private void skipFieldMessage(Tokenizer tokenizer) throws ParseException {
1737       final String delimiter;
1738       if (tokenizer.tryConsume("<")) {
1739         delimiter = ">";
1740       } else {
1741         tokenizer.consume("{");
1742         delimiter = "}";
1743       }
1744       while (!tokenizer.lookingAt(">") && !tokenizer.lookingAt("}")) {
1745         skipField(tokenizer);
1746       }
1747       tokenizer.consume(delimiter);
1748     }
1749 
1750     /**
1751      * Skips a field value.
1752      */
skipFieldValue(Tokenizer tokenizer)1753     private void skipFieldValue(Tokenizer tokenizer) throws ParseException {
1754       if (tokenizer.tryConsumeString()) {
1755         while (tokenizer.tryConsumeString()) {}
1756         return;
1757       }
1758       if (!tokenizer.tryConsumeIdentifier()   // includes enum & boolean
1759           && !tokenizer.tryConsumeInt64()     // includes int32
1760           && !tokenizer.tryConsumeUInt64()    // includes uint32
1761           && !tokenizer.tryConsumeDouble()
1762           && !tokenizer.tryConsumeFloat()) {
1763         throw tokenizer.parseException(
1764             "Invalid field value: " + tokenizer.currentToken);
1765       }
1766     }
1767   }
1768 
1769   // =================================================================
1770   // Utility functions
1771   //
1772   // Some of these methods are package-private because Descriptors.java uses
1773   // them.
1774 
1775   /**
1776    * Escapes bytes in the format used in protocol buffer text format, which
1777    * is the same as the format used for C string literals.  All bytes
1778    * that are not printable 7-bit ASCII characters are escaped, as well as
1779    * backslash, single-quote, and double-quote characters.  Characters for
1780    * which no defined short-hand escape sequence is defined will be escaped
1781    * using 3-digit octal sequences.
1782    */
escapeBytes(ByteString input)1783   public static String escapeBytes(ByteString input) {
1784     return TextFormatEscaper.escapeBytes(input);
1785   }
1786 
1787   /**
1788    * Like {@link #escapeBytes(ByteString)}, but used for byte array.
1789    */
escapeBytes(byte[] input)1790   public static String escapeBytes(byte[] input) {
1791     return TextFormatEscaper.escapeBytes(input);
1792   }
1793 
1794   /**
1795    * Un-escape a byte sequence as escaped using
1796    * {@link #escapeBytes(ByteString)}.  Two-digit hex escapes (starting with
1797    * "\x") are also recognized.
1798    */
unescapeBytes(final CharSequence charString)1799   public static ByteString unescapeBytes(final CharSequence charString)
1800       throws InvalidEscapeSequenceException {
1801     // First convert the Java character sequence to UTF-8 bytes.
1802     ByteString input = ByteString.copyFromUtf8(charString.toString());
1803     // Then unescape certain byte sequences introduced by ASCII '\\'.  The valid
1804     // escapes can all be expressed with ASCII characters, so it is safe to
1805     // operate on bytes here.
1806     //
1807     // Unescaping the input byte array will result in a byte sequence that's no
1808     // longer than the input.  That's because each escape sequence is between
1809     // two and four bytes long and stands for a single byte.
1810     final byte[] result = new byte[input.size()];
1811     int pos = 0;
1812     for (int i = 0; i < input.size(); i++) {
1813       byte c = input.byteAt(i);
1814       if (c == '\\') {
1815         if (i + 1 < input.size()) {
1816           ++i;
1817           c = input.byteAt(i);
1818           if (isOctal(c)) {
1819             // Octal escape.
1820             int code = digitValue(c);
1821             if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) {
1822               ++i;
1823               code = code * 8 + digitValue(input.byteAt(i));
1824             }
1825             if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) {
1826               ++i;
1827               code = code * 8 + digitValue(input.byteAt(i));
1828             }
1829             // TODO: Check that 0 <= code && code <= 0xFF.
1830             result[pos++] = (byte) code;
1831           } else {
1832             switch (c) {
1833               case 'a' : result[pos++] = 0x07; break;
1834               case 'b' : result[pos++] = '\b'; break;
1835               case 'f' : result[pos++] = '\f'; break;
1836               case 'n' : result[pos++] = '\n'; break;
1837               case 'r' : result[pos++] = '\r'; break;
1838               case 't' : result[pos++] = '\t'; break;
1839               case 'v' : result[pos++] = 0x0b; break;
1840               case '\\': result[pos++] = '\\'; break;
1841               case '\'': result[pos++] = '\''; break;
1842               case '"' : result[pos++] = '\"'; break;
1843 
1844               case 'x':
1845                 // hex escape
1846                 int code = 0;
1847                 if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) {
1848                   ++i;
1849                   code = digitValue(input.byteAt(i));
1850                 } else {
1851                   throw new InvalidEscapeSequenceException(
1852                       "Invalid escape sequence: '\\x' with no digits");
1853                 }
1854                 if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) {
1855                   ++i;
1856                   code = code * 16 + digitValue(input.byteAt(i));
1857                 }
1858                 result[pos++] = (byte) code;
1859                 break;
1860 
1861               default:
1862                 throw new InvalidEscapeSequenceException(
1863                     "Invalid escape sequence: '\\" + (char) c + '\'');
1864             }
1865           }
1866         } else {
1867           throw new InvalidEscapeSequenceException(
1868               "Invalid escape sequence: '\\' at end of string.");
1869         }
1870       } else {
1871         result[pos++] = c;
1872       }
1873     }
1874 
1875     return result.length == pos
1876         ? ByteString.wrap(result)  // This reference has not been out of our control.
1877         : ByteString.copyFrom(result, 0, pos);
1878   }
1879 
1880   /**
1881    * Thrown by {@link TextFormat#unescapeBytes} and
1882    * {@link TextFormat#unescapeText} when an invalid escape sequence is seen.
1883    */
1884   public static class InvalidEscapeSequenceException extends IOException {
1885     private static final long serialVersionUID = -8164033650142593304L;
1886 
InvalidEscapeSequenceException(final String description)1887     InvalidEscapeSequenceException(final String description) {
1888       super(description);
1889     }
1890   }
1891 
1892   /**
1893    * Like {@link #escapeBytes(ByteString)}, but escapes a text string.
1894    * Non-ASCII characters are first encoded as UTF-8, then each byte is escaped
1895    * individually as a 3-digit octal escape.  Yes, it's weird.
1896    */
escapeText(final String input)1897   static String escapeText(final String input) {
1898     return escapeBytes(ByteString.copyFromUtf8(input));
1899   }
1900 
1901   /**
1902    * Escape double quotes and backslashes in a String for unicode output of a message.
1903    */
escapeDoubleQuotesAndBackslashes(final String input)1904   public static String escapeDoubleQuotesAndBackslashes(final String input) {
1905     return TextFormatEscaper.escapeDoubleQuotesAndBackslashes(input);
1906   }
1907 
1908   /**
1909    * Un-escape a text string as escaped using {@link #escapeText(String)}.
1910    * Two-digit hex escapes (starting with "\x") are also recognized.
1911    */
unescapeText(final String input)1912   static String unescapeText(final String input)
1913                              throws InvalidEscapeSequenceException {
1914     return unescapeBytes(input).toStringUtf8();
1915   }
1916 
1917   /** Is this an octal digit? */
isOctal(final byte c)1918   private static boolean isOctal(final byte c) {
1919     return '0' <= c && c <= '7';
1920   }
1921 
1922   /** Is this a hex digit? */
isHex(final byte c)1923   private static boolean isHex(final byte c) {
1924     return ('0' <= c && c <= '9')
1925         || ('a' <= c && c <= 'f')
1926         || ('A' <= c && c <= 'F');
1927   }
1928 
1929   /**
1930    * Interpret a character as a digit (in any base up to 36) and return the
1931    * numeric value.  This is like {@code Character.digit()} but we don't accept
1932    * non-ASCII digits.
1933    */
digitValue(final byte c)1934   private static int digitValue(final byte c) {
1935     if ('0' <= c && c <= '9') {
1936       return c - '0';
1937     } else if ('a' <= c && c <= 'z') {
1938       return c - 'a' + 10;
1939     } else {
1940       return c - 'A' + 10;
1941     }
1942   }
1943 
1944   /**
1945    * Parse a 32-bit signed integer from the text.  Unlike the Java standard
1946    * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
1947    * and "0" to signify hexadecimal and octal numbers, respectively.
1948    */
parseInt32(final String text)1949   static int parseInt32(final String text) throws NumberFormatException {
1950     return (int) parseInteger(text, true, false);
1951   }
1952 
1953   /**
1954    * Parse a 32-bit unsigned integer from the text.  Unlike the Java standard
1955    * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
1956    * and "0" to signify hexadecimal and octal numbers, respectively.  The
1957    * result is coerced to a (signed) {@code int} when returned since Java has
1958    * no unsigned integer type.
1959    */
parseUInt32(final String text)1960   static int parseUInt32(final String text) throws NumberFormatException {
1961     return (int) parseInteger(text, false, false);
1962   }
1963 
1964   /**
1965    * Parse a 64-bit signed integer from the text.  Unlike the Java standard
1966    * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
1967    * and "0" to signify hexadecimal and octal numbers, respectively.
1968    */
parseInt64(final String text)1969   static long parseInt64(final String text) throws NumberFormatException {
1970     return parseInteger(text, true, true);
1971   }
1972 
1973   /**
1974    * Parse a 64-bit unsigned integer from the text.  Unlike the Java standard
1975    * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
1976    * and "0" to signify hexadecimal and octal numbers, respectively.  The
1977    * result is coerced to a (signed) {@code long} when returned since Java has
1978    * no unsigned long type.
1979    */
parseUInt64(final String text)1980   static long parseUInt64(final String text) throws NumberFormatException {
1981     return parseInteger(text, false, true);
1982   }
1983 
parseInteger(final String text, final boolean isSigned, final boolean isLong)1984   private static long parseInteger(final String text,
1985                                    final boolean isSigned,
1986                                    final boolean isLong)
1987                                    throws NumberFormatException {
1988     int pos = 0;
1989 
1990     boolean negative = false;
1991     if (text.startsWith("-", pos)) {
1992       if (!isSigned) {
1993         throw new NumberFormatException("Number must be positive: " + text);
1994       }
1995       ++pos;
1996       negative = true;
1997     }
1998 
1999     int radix = 10;
2000     if (text.startsWith("0x", pos)) {
2001       pos += 2;
2002       radix = 16;
2003     } else if (text.startsWith("0", pos)) {
2004       radix = 8;
2005     }
2006 
2007     final String numberText = text.substring(pos);
2008 
2009     long result = 0;
2010     if (numberText.length() < 16) {
2011       // Can safely assume no overflow.
2012       result = Long.parseLong(numberText, radix);
2013       if (negative) {
2014         result = -result;
2015       }
2016 
2017       // Check bounds.
2018       // No need to check for 64-bit numbers since they'd have to be 16 chars
2019       // or longer to overflow.
2020       if (!isLong) {
2021         if (isSigned) {
2022           if (result > Integer.MAX_VALUE || result < Integer.MIN_VALUE) {
2023             throw new NumberFormatException(
2024               "Number out of range for 32-bit signed integer: " + text);
2025           }
2026         } else {
2027           if (result >= (1L << 32) || result < 0) {
2028             throw new NumberFormatException(
2029               "Number out of range for 32-bit unsigned integer: " + text);
2030           }
2031         }
2032       }
2033     } else {
2034       BigInteger bigValue = new BigInteger(numberText, radix);
2035       if (negative) {
2036         bigValue = bigValue.negate();
2037       }
2038 
2039       // Check bounds.
2040       if (!isLong) {
2041         if (isSigned) {
2042           if (bigValue.bitLength() > 31) {
2043             throw new NumberFormatException(
2044               "Number out of range for 32-bit signed integer: " + text);
2045           }
2046         } else {
2047           if (bigValue.bitLength() > 32) {
2048             throw new NumberFormatException(
2049               "Number out of range for 32-bit unsigned integer: " + text);
2050           }
2051         }
2052       } else {
2053         if (isSigned) {
2054           if (bigValue.bitLength() > 63) {
2055             throw new NumberFormatException(
2056               "Number out of range for 64-bit signed integer: " + text);
2057           }
2058         } else {
2059           if (bigValue.bitLength() > 64) {
2060             throw new NumberFormatException(
2061               "Number out of range for 64-bit unsigned integer: " + text);
2062           }
2063         }
2064       }
2065 
2066       result = bigValue.longValue();
2067     }
2068 
2069     return result;
2070   }
2071 }
2072