1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: jschorr@google.com (Joseph Schorr)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 
35 #include <algorithm>
36 #include <float.h>
37 #include <math.h>
38 #include <stdio.h>
39 #include <stack>
40 #include <limits>
41 #include <vector>
42 
43 #include <google/protobuf/text_format.h>
44 
45 #include <google/protobuf/descriptor.h>
46 #include <google/protobuf/wire_format_lite.h>
47 #include <google/protobuf/io/coded_stream.h>
48 #include <google/protobuf/io/zero_copy_stream.h>
49 #include <google/protobuf/io/zero_copy_stream_impl.h>
50 #include <google/protobuf/unknown_field_set.h>
51 #include <google/protobuf/descriptor.pb.h>
52 #include <google/protobuf/io/tokenizer.h>
53 #include <google/protobuf/stubs/strutil.h>
54 #include <google/protobuf/stubs/map_util.h>
55 #include <google/protobuf/stubs/stl_util.h>
56 
57 namespace google {
58 namespace protobuf {
59 
60 namespace {
61 
IsHexNumber(const string & str)62 inline bool IsHexNumber(const string& str) {
63   return (str.length() >= 2 && str[0] == '0' &&
64           (str[1] == 'x' || str[1] == 'X'));
65 }
66 
IsOctNumber(const string & str)67 inline bool IsOctNumber(const string& str) {
68   return (str.length() >= 2 && str[0] == '0' &&
69           (str[1] >= '0' && str[1] < '8'));
70 }
71 
72 }  // namespace
73 
DebugString() const74 string Message::DebugString() const {
75   string debug_string;
76 
77   TextFormat::PrintToString(*this, &debug_string);
78 
79   return debug_string;
80 }
81 
ShortDebugString() const82 string Message::ShortDebugString() const {
83   string debug_string;
84 
85   TextFormat::Printer printer;
86   printer.SetSingleLineMode(true);
87 
88   printer.PrintToString(*this, &debug_string);
89   // Single line mode currently might have an extra space at the end.
90   if (debug_string.size() > 0 &&
91       debug_string[debug_string.size() - 1] == ' ') {
92     debug_string.resize(debug_string.size() - 1);
93   }
94 
95   return debug_string;
96 }
97 
Utf8DebugString() const98 string Message::Utf8DebugString() const {
99   string debug_string;
100 
101   TextFormat::Printer printer;
102   printer.SetUseUtf8StringEscaping(true);
103 
104   printer.PrintToString(*this, &debug_string);
105 
106   return debug_string;
107 }
108 
PrintDebugString() const109 void Message::PrintDebugString() const {
110   printf("%s", DebugString().c_str());
111 }
112 
113 
114 // ===========================================================================
115 // Implementation of the parse information tree class.
ParseInfoTree()116 TextFormat::ParseInfoTree::ParseInfoTree() { }
117 
~ParseInfoTree()118 TextFormat::ParseInfoTree::~ParseInfoTree() {
119   // Remove any nested information trees, as they are owned by this tree.
120   for (NestedMap::iterator it = nested_.begin(); it != nested_.end(); ++it) {
121     STLDeleteElements(&(it->second));
122   }
123 }
124 
RecordLocation(const FieldDescriptor * field,TextFormat::ParseLocation location)125 void TextFormat::ParseInfoTree::RecordLocation(
126     const FieldDescriptor* field,
127     TextFormat::ParseLocation location) {
128   locations_[field].push_back(location);
129 }
130 
CreateNested(const FieldDescriptor * field)131 TextFormat::ParseInfoTree* TextFormat::ParseInfoTree::CreateNested(
132     const FieldDescriptor* field) {
133   // Owned by us in the map.
134   TextFormat::ParseInfoTree* instance = new TextFormat::ParseInfoTree();
135   vector<TextFormat::ParseInfoTree*>* trees = &nested_[field];
136   GOOGLE_CHECK(trees);
137   trees->push_back(instance);
138   return instance;
139 }
140 
CheckFieldIndex(const FieldDescriptor * field,int index)141 void CheckFieldIndex(const FieldDescriptor* field, int index) {
142   if (field == NULL) { return; }
143 
144   if (field->is_repeated() && index == -1) {
145     GOOGLE_LOG(DFATAL) << "Index must be in range of repeated field values. "
146                 << "Field: " << field->name();
147   } else if (!field->is_repeated() && index != -1) {
148     GOOGLE_LOG(DFATAL) << "Index must be -1 for singular fields."
149                 << "Field: " << field->name();
150   }
151 }
152 
GetLocation(const FieldDescriptor * field,int index) const153 TextFormat::ParseLocation TextFormat::ParseInfoTree::GetLocation(
154     const FieldDescriptor* field, int index) const {
155   CheckFieldIndex(field, index);
156   if (index == -1) { index = 0; }
157 
158   const vector<TextFormat::ParseLocation>* locations =
159       FindOrNull(locations_, field);
160   if (locations == NULL || index >= locations->size()) {
161     return TextFormat::ParseLocation();
162   }
163 
164   return (*locations)[index];
165 }
166 
GetTreeForNested(const FieldDescriptor * field,int index) const167 TextFormat::ParseInfoTree* TextFormat::ParseInfoTree::GetTreeForNested(
168     const FieldDescriptor* field, int index) const {
169   CheckFieldIndex(field, index);
170   if (index == -1) { index = 0; }
171 
172   const vector<TextFormat::ParseInfoTree*>* trees = FindOrNull(nested_, field);
173   if (trees == NULL || index >= trees->size()) {
174     return NULL;
175   }
176 
177   return (*trees)[index];
178 }
179 
180 
181 // ===========================================================================
182 // Internal class for parsing an ASCII representation of a Protocol Message.
183 // This class makes use of the Protocol Message compiler's tokenizer found
184 // in //google/protobuf/io/tokenizer.h. Note that class's Parse
185 // method is *not* thread-safe and should only be used in a single thread at
186 // a time.
187 
188 // Makes code slightly more readable.  The meaning of "DO(foo)" is
189 // "Execute foo and fail if it fails.", where failure is indicated by
190 // returning false. Borrowed from parser.cc (Thanks Kenton!).
191 #define DO(STATEMENT) if (STATEMENT) {} else return false
192 
193 class TextFormat::Parser::ParserImpl {
194  public:
195 
196   // Determines if repeated values for non-repeated fields and
197   // oneofs are permitted, e.g., the string "foo: 1 foo: 2" for a
198   // required/optional field named "foo", or "baz: 1 qux: 2"
199   // where "baz" and "qux" are members of the same oneof.
200   enum SingularOverwritePolicy {
201     ALLOW_SINGULAR_OVERWRITES = 0,   // the last value is retained
202     FORBID_SINGULAR_OVERWRITES = 1,  // an error is issued
203   };
204 
ParserImpl(const Descriptor * root_message_type,io::ZeroCopyInputStream * input_stream,io::ErrorCollector * error_collector,TextFormat::Finder * finder,ParseInfoTree * parse_info_tree,SingularOverwritePolicy singular_overwrite_policy,bool allow_case_insensitive_field,bool allow_unknown_field,bool allow_unknown_enum,bool allow_field_number,bool allow_relaxed_whitespace)205   ParserImpl(const Descriptor* root_message_type,
206              io::ZeroCopyInputStream* input_stream,
207              io::ErrorCollector* error_collector,
208              TextFormat::Finder* finder,
209              ParseInfoTree* parse_info_tree,
210              SingularOverwritePolicy singular_overwrite_policy,
211              bool allow_case_insensitive_field,
212              bool allow_unknown_field,
213              bool allow_unknown_enum,
214              bool allow_field_number,
215              bool allow_relaxed_whitespace)
216     : error_collector_(error_collector),
217       finder_(finder),
218       parse_info_tree_(parse_info_tree),
219       tokenizer_error_collector_(this),
220       tokenizer_(input_stream, &tokenizer_error_collector_),
221       root_message_type_(root_message_type),
222       singular_overwrite_policy_(singular_overwrite_policy),
223       allow_case_insensitive_field_(allow_case_insensitive_field),
224       allow_unknown_field_(allow_unknown_field),
225       allow_unknown_enum_(allow_unknown_enum),
226       allow_field_number_(allow_field_number),
227       had_errors_(false) {
228     // For backwards-compatibility with proto1, we need to allow the 'f' suffix
229     // for floats.
230     tokenizer_.set_allow_f_after_float(true);
231 
232     // '#' starts a comment.
233     tokenizer_.set_comment_style(io::Tokenizer::SH_COMMENT_STYLE);
234 
235     if (allow_relaxed_whitespace) {
236       tokenizer_.set_require_space_after_number(false);
237       tokenizer_.set_allow_multiline_strings(true);
238     }
239 
240     // Consume the starting token.
241     tokenizer_.Next();
242   }
~ParserImpl()243   ~ParserImpl() { }
244 
245   // Parses the ASCII representation specified in input and saves the
246   // information into the output pointer (a Message). Returns
247   // false if an error occurs (an error will also be logged to
248   // GOOGLE_LOG(ERROR)).
Parse(Message * output)249   bool Parse(Message* output) {
250     // Consume fields until we cannot do so anymore.
251     while (true) {
252       if (LookingAtType(io::Tokenizer::TYPE_END)) {
253         return !had_errors_;
254       }
255 
256       DO(ConsumeField(output));
257     }
258   }
259 
ParseField(const FieldDescriptor * field,Message * output)260   bool ParseField(const FieldDescriptor* field, Message* output) {
261     bool suc;
262     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
263       suc = ConsumeFieldMessage(output, output->GetReflection(), field);
264     } else {
265       suc = ConsumeFieldValue(output, output->GetReflection(), field);
266     }
267     return suc && LookingAtType(io::Tokenizer::TYPE_END);
268   }
269 
ReportError(int line,int col,const string & message)270   void ReportError(int line, int col, const string& message) {
271     had_errors_ = true;
272     if (error_collector_ == NULL) {
273       if (line >= 0) {
274         GOOGLE_LOG(ERROR) << "Error parsing text-format "
275                    << root_message_type_->full_name()
276                    << ": " << (line + 1) << ":"
277                    << (col + 1) << ": " << message;
278       } else {
279         GOOGLE_LOG(ERROR) << "Error parsing text-format "
280                    << root_message_type_->full_name()
281                    << ": " << message;
282       }
283     } else {
284       error_collector_->AddError(line, col, message);
285     }
286   }
287 
ReportWarning(int line,int col,const string & message)288   void ReportWarning(int line, int col, const string& message) {
289     if (error_collector_ == NULL) {
290       if (line >= 0) {
291         GOOGLE_LOG(WARNING) << "Warning parsing text-format "
292                      << root_message_type_->full_name()
293                      << ": " << (line + 1) << ":"
294                      << (col + 1) << ": " << message;
295       } else {
296         GOOGLE_LOG(WARNING) << "Warning parsing text-format "
297                      << root_message_type_->full_name()
298                      << ": " << message;
299       }
300     } else {
301       error_collector_->AddWarning(line, col, message);
302     }
303   }
304 
305  private:
306   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserImpl);
307 
308   // Reports an error with the given message with information indicating
309   // the position (as derived from the current token).
ReportError(const string & message)310   void ReportError(const string& message) {
311     ReportError(tokenizer_.current().line, tokenizer_.current().column,
312                 message);
313   }
314 
315   // Reports a warning with the given message with information indicating
316   // the position (as derived from the current token).
ReportWarning(const string & message)317   void ReportWarning(const string& message) {
318     ReportWarning(tokenizer_.current().line, tokenizer_.current().column,
319                   message);
320   }
321 
322   // Consumes the specified message with the given starting delimeter.
323   // This method checks to see that the end delimeter at the conclusion of
324   // the consumption matches the starting delimeter passed in here.
ConsumeMessage(Message * message,const string delimeter)325   bool ConsumeMessage(Message* message, const string delimeter) {
326     while (!LookingAt(">") &&  !LookingAt("}")) {
327       DO(ConsumeField(message));
328     }
329 
330     // Confirm that we have a valid ending delimeter.
331     DO(Consume(delimeter));
332 
333     return true;
334   }
335 
336 
337   // Consumes the current field (as returned by the tokenizer) on the
338   // passed in message.
ConsumeField(Message * message)339   bool ConsumeField(Message* message) {
340     const Reflection* reflection = message->GetReflection();
341     const Descriptor* descriptor = message->GetDescriptor();
342 
343     string field_name;
344 
345     const FieldDescriptor* field = NULL;
346     int start_line = tokenizer_.current().line;
347     int start_column = tokenizer_.current().column;
348 
349     if (TryConsume("[")) {
350       // Extension.
351       DO(ConsumeIdentifier(&field_name));
352       while (TryConsume(".")) {
353         string part;
354         DO(ConsumeIdentifier(&part));
355         field_name += ".";
356         field_name += part;
357       }
358       DO(Consume("]"));
359 
360       field = (finder_ != NULL
361                ? finder_->FindExtension(message, field_name)
362                : reflection->FindKnownExtensionByName(field_name));
363 
364       if (field == NULL) {
365         if (!allow_unknown_field_) {
366           ReportError("Extension \"" + field_name + "\" is not defined or "
367                       "is not an extension of \"" +
368                       descriptor->full_name() + "\".");
369           return false;
370         } else {
371           ReportWarning("Extension \"" + field_name + "\" is not defined or "
372                         "is not an extension of \"" +
373                         descriptor->full_name() + "\".");
374         }
375       }
376     } else {
377       DO(ConsumeIdentifier(&field_name));
378 
379       int32 field_number;
380       if (allow_field_number_ && safe_strto32(field_name, &field_number)) {
381         if (descriptor->IsExtensionNumber(field_number)) {
382           field = reflection->FindKnownExtensionByNumber(field_number);
383         } else {
384           field = descriptor->FindFieldByNumber(field_number);
385         }
386       } else {
387         field = descriptor->FindFieldByName(field_name);
388         // Group names are expected to be capitalized as they appear in the
389         // .proto file, which actually matches their type names, not their
390         // field names.
391         if (field == NULL) {
392           string lower_field_name = field_name;
393           LowerString(&lower_field_name);
394           field = descriptor->FindFieldByName(lower_field_name);
395           // If the case-insensitive match worked but the field is NOT a group,
396           if (field != NULL && field->type() != FieldDescriptor::TYPE_GROUP) {
397             field = NULL;
398           }
399         }
400         // Again, special-case group names as described above.
401         if (field != NULL && field->type() == FieldDescriptor::TYPE_GROUP
402             && field->message_type()->name() != field_name) {
403           field = NULL;
404         }
405 
406         if (field == NULL && allow_case_insensitive_field_) {
407           string lower_field_name = field_name;
408           LowerString(&lower_field_name);
409           field = descriptor->FindFieldByLowercaseName(lower_field_name);
410         }
411       }
412 
413       if (field == NULL) {
414         if (!allow_unknown_field_) {
415           ReportError("Message type \"" + descriptor->full_name() +
416                       "\" has no field named \"" + field_name + "\".");
417           return false;
418         } else {
419           ReportWarning("Message type \"" + descriptor->full_name() +
420                         "\" has no field named \"" + field_name + "\".");
421         }
422       }
423     }
424 
425     // Skips unknown field.
426     if (field == NULL) {
427       GOOGLE_CHECK(allow_unknown_field_);
428       // Try to guess the type of this field.
429       // If this field is not a message, there should be a ":" between the
430       // field name and the field value and also the field value should not
431       // start with "{" or "<" which indicates the begining of a message body.
432       // If there is no ":" or there is a "{" or "<" after ":", this field has
433       // to be a message or the input is ill-formed.
434       if (TryConsume(":") && !LookingAt("{") && !LookingAt("<")) {
435         return SkipFieldValue();
436       } else {
437         return SkipFieldMessage();
438       }
439     }
440 
441     if (singular_overwrite_policy_ == FORBID_SINGULAR_OVERWRITES) {
442       // Fail if the field is not repeated and it has already been specified.
443       if (!field->is_repeated() && reflection->HasField(*message, field)) {
444         ReportError("Non-repeated field \"" + field_name +
445                     "\" is specified multiple times.");
446         return false;
447       }
448       // Fail if the field is a member of a oneof and another member has already
449       // been specified.
450       const OneofDescriptor* oneof = field->containing_oneof();
451       if (oneof != NULL && reflection->HasOneof(*message, oneof)) {
452         const FieldDescriptor* other_field =
453             reflection->GetOneofFieldDescriptor(*message, oneof);
454         ReportError("Field \"" + field_name + "\" is specified along with "
455                     "field \"" + other_field->name() + "\", another member "
456                     "of oneof \"" + oneof->name() + "\".");
457         return false;
458       }
459     }
460 
461     // Perform special handling for embedded message types.
462     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
463       // ':' is optional here.
464       TryConsume(":");
465     } else {
466       // ':' is required here.
467       DO(Consume(":"));
468     }
469 
470     if (field->is_repeated() && TryConsume("[")) {
471       // Short repeated format, e.g.  "foo: [1, 2, 3]"
472       while (true) {
473         if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
474           // Perform special handling for embedded message types.
475           DO(ConsumeFieldMessage(message, reflection, field));
476         } else {
477           DO(ConsumeFieldValue(message, reflection, field));
478         }
479         if (TryConsume("]")) {
480           break;
481         }
482         DO(Consume(","));
483       }
484     } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
485       DO(ConsumeFieldMessage(message, reflection, field));
486     } else {
487       DO(ConsumeFieldValue(message, reflection, field));
488     }
489 
490     // For historical reasons, fields may optionally be separated by commas or
491     // semicolons.
492     TryConsume(";") || TryConsume(",");
493 
494     if (field->options().deprecated()) {
495       ReportWarning("text format contains deprecated field \""
496                     + field_name + "\"");
497     }
498 
499     // If a parse info tree exists, add the location for the parsed
500     // field.
501     if (parse_info_tree_ != NULL) {
502       RecordLocation(parse_info_tree_, field,
503                      ParseLocation(start_line, start_column));
504     }
505 
506     return true;
507   }
508 
509   // Skips the next field including the field's name and value.
SkipField()510   bool SkipField() {
511     string field_name;
512     if (TryConsume("[")) {
513       // Extension name.
514       DO(ConsumeIdentifier(&field_name));
515       while (TryConsume(".")) {
516         string part;
517         DO(ConsumeIdentifier(&part));
518         field_name += ".";
519         field_name += part;
520       }
521       DO(Consume("]"));
522     } else {
523       DO(ConsumeIdentifier(&field_name));
524     }
525 
526     // Try to guess the type of this field.
527     // If this field is not a message, there should be a ":" between the
528     // field name and the field value and also the field value should not
529     // start with "{" or "<" which indicates the begining of a message body.
530     // If there is no ":" or there is a "{" or "<" after ":", this field has
531     // to be a message or the input is ill-formed.
532     if (TryConsume(":") && !LookingAt("{") && !LookingAt("<")) {
533       DO(SkipFieldValue());
534     } else {
535       DO(SkipFieldMessage());
536     }
537     // For historical reasons, fields may optionally be separated by commas or
538     // semicolons.
539     TryConsume(";") || TryConsume(",");
540     return true;
541   }
542 
ConsumeFieldMessage(Message * message,const Reflection * reflection,const FieldDescriptor * field)543   bool ConsumeFieldMessage(Message* message,
544                            const Reflection* reflection,
545                            const FieldDescriptor* field) {
546 
547     // If the parse information tree is not NULL, create a nested one
548     // for the nested message.
549     ParseInfoTree* parent = parse_info_tree_;
550     if (parent != NULL) {
551       parse_info_tree_ = CreateNested(parent, field);
552     }
553 
554     string delimeter;
555     if (TryConsume("<")) {
556       delimeter = ">";
557     } else {
558       DO(Consume("{"));
559       delimeter = "}";
560     }
561 
562     if (field->is_repeated()) {
563       DO(ConsumeMessage(reflection->AddMessage(message, field), delimeter));
564     } else {
565       DO(ConsumeMessage(reflection->MutableMessage(message, field),
566                         delimeter));
567     }
568 
569     // Reset the parse information tree.
570     parse_info_tree_ = parent;
571     return true;
572   }
573 
574   // Skips the whole body of a message including the begining delimeter and
575   // the ending delimeter.
SkipFieldMessage()576   bool SkipFieldMessage() {
577     string delimeter;
578     if (TryConsume("<")) {
579       delimeter = ">";
580     } else {
581       DO(Consume("{"));
582       delimeter = "}";
583     }
584     while (!LookingAt(">") &&  !LookingAt("}")) {
585       DO(SkipField());
586     }
587     DO(Consume(delimeter));
588     return true;
589   }
590 
ConsumeFieldValue(Message * message,const Reflection * reflection,const FieldDescriptor * field)591   bool ConsumeFieldValue(Message* message,
592                          const Reflection* reflection,
593                          const FieldDescriptor* field) {
594 
595 // Define an easy to use macro for setting fields. This macro checks
596 // to see if the field is repeated (in which case we need to use the Add
597 // methods or not (in which case we need to use the Set methods).
598 #define SET_FIELD(CPPTYPE, VALUE)                                  \
599         if (field->is_repeated()) {                                \
600           reflection->Add##CPPTYPE(message, field, VALUE);         \
601         } else {                                                   \
602           reflection->Set##CPPTYPE(message, field, VALUE);         \
603         }                                                          \
604 
605     switch(field->cpp_type()) {
606       case FieldDescriptor::CPPTYPE_INT32: {
607         int64 value;
608         DO(ConsumeSignedInteger(&value, kint32max));
609         SET_FIELD(Int32, static_cast<int32>(value));
610         break;
611       }
612 
613       case FieldDescriptor::CPPTYPE_UINT32: {
614         uint64 value;
615         DO(ConsumeUnsignedInteger(&value, kuint32max));
616         SET_FIELD(UInt32, static_cast<uint32>(value));
617         break;
618       }
619 
620       case FieldDescriptor::CPPTYPE_INT64: {
621         int64 value;
622         DO(ConsumeSignedInteger(&value, kint64max));
623         SET_FIELD(Int64, value);
624         break;
625       }
626 
627       case FieldDescriptor::CPPTYPE_UINT64: {
628         uint64 value;
629         DO(ConsumeUnsignedInteger(&value, kuint64max));
630         SET_FIELD(UInt64, value);
631         break;
632       }
633 
634       case FieldDescriptor::CPPTYPE_FLOAT: {
635         double value;
636         DO(ConsumeDouble(&value));
637         SET_FIELD(Float, static_cast<float>(value));
638         break;
639       }
640 
641       case FieldDescriptor::CPPTYPE_DOUBLE: {
642         double value;
643         DO(ConsumeDouble(&value));
644         SET_FIELD(Double, value);
645         break;
646       }
647 
648       case FieldDescriptor::CPPTYPE_STRING: {
649         string value;
650         DO(ConsumeString(&value));
651         SET_FIELD(String, value);
652         break;
653       }
654 
655       case FieldDescriptor::CPPTYPE_BOOL: {
656         if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
657           uint64 value;
658           DO(ConsumeUnsignedInteger(&value, 1));
659           SET_FIELD(Bool, value);
660         } else {
661           string value;
662           DO(ConsumeIdentifier(&value));
663           if (value == "true" || value == "True" || value == "t") {
664             SET_FIELD(Bool, true);
665           } else if (value == "false" || value == "False" || value == "f") {
666             SET_FIELD(Bool, false);
667           } else {
668             ReportError("Invalid value for boolean field \"" + field->name()
669                         + "\". Value: \"" + value  + "\".");
670             return false;
671           }
672         }
673         break;
674       }
675 
676       case FieldDescriptor::CPPTYPE_ENUM: {
677         string value;
678         const EnumDescriptor* enum_type = field->enum_type();
679         const EnumValueDescriptor* enum_value = NULL;
680 
681         if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
682           DO(ConsumeIdentifier(&value));
683           // Find the enumeration value.
684           enum_value = enum_type->FindValueByName(value);
685 
686         } else if (LookingAt("-") ||
687                    LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
688           int64 int_value;
689           DO(ConsumeSignedInteger(&int_value, kint32max));
690           value = SimpleItoa(int_value);        // for error reporting
691           enum_value = enum_type->FindValueByNumber(int_value);
692         } else {
693           ReportError("Expected integer or identifier.");
694           return false;
695         }
696 
697         if (enum_value == NULL) {
698           if (!allow_unknown_enum_) {
699             ReportError("Unknown enumeration value of \"" + value  + "\" for "
700                         "field \"" + field->name() + "\".");
701             return false;
702           } else {
703             ReportWarning("Unknown enumeration value of \"" + value  + "\" for "
704                           "field \"" + field->name() + "\".");
705             return true;
706           }
707         }
708 
709         SET_FIELD(Enum, enum_value);
710         break;
711       }
712 
713       case FieldDescriptor::CPPTYPE_MESSAGE: {
714         // We should never get here. Put here instead of a default
715         // so that if new types are added, we get a nice compiler warning.
716         GOOGLE_LOG(FATAL) << "Reached an unintended state: CPPTYPE_MESSAGE";
717         break;
718       }
719     }
720 #undef SET_FIELD
721     return true;
722   }
723 
SkipFieldValue()724   bool SkipFieldValue() {
725     if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
726       while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
727         tokenizer_.Next();
728       }
729       return true;
730     }
731     // Possible field values other than string:
732     //   12345        => TYPE_INTEGER
733     //   -12345       => TYPE_SYMBOL + TYPE_INTEGER
734     //   1.2345       => TYPE_FLOAT
735     //   -1.2345      => TYPE_SYMBOL + TYPE_FLOAT
736     //   inf          => TYPE_IDENTIFIER
737     //   -inf         => TYPE_SYMBOL + TYPE_IDENTIFIER
738     //   TYPE_INTEGER => TYPE_IDENTIFIER
739     // Divides them into two group, one with TYPE_SYMBOL
740     // and the other without:
741     //   Group one:
742     //     12345        => TYPE_INTEGER
743     //     1.2345       => TYPE_FLOAT
744     //     inf          => TYPE_IDENTIFIER
745     //     TYPE_INTEGER => TYPE_IDENTIFIER
746     //   Group two:
747     //     -12345       => TYPE_SYMBOL + TYPE_INTEGER
748     //     -1.2345      => TYPE_SYMBOL + TYPE_FLOAT
749     //     -inf         => TYPE_SYMBOL + TYPE_IDENTIFIER
750     // As we can see, the field value consists of an optional '-' and one of
751     // TYPE_INTEGER, TYPE_FLOAT and TYPE_IDENTIFIER.
752     bool has_minus = TryConsume("-");
753     if (!LookingAtType(io::Tokenizer::TYPE_INTEGER) &&
754         !LookingAtType(io::Tokenizer::TYPE_FLOAT) &&
755         !LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
756       return false;
757     }
758     // Combination of '-' and TYPE_IDENTIFIER may result in an invalid field
759     // value while other combinations all generate valid values.
760     // We check if the value of this combination is valid here.
761     // TYPE_IDENTIFIER after a '-' should be one of the float values listed
762     // below:
763     //   inf, inff, infinity, nan
764     if (has_minus && LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
765       string text = tokenizer_.current().text;
766       LowerString(&text);
767       if (text != "inf" &&
768           text != "infinity" &&
769           text != "nan") {
770         ReportError("Invalid float number: " + text);
771         return false;
772       }
773     }
774     tokenizer_.Next();
775     return true;
776   }
777 
778   // Returns true if the current token's text is equal to that specified.
LookingAt(const string & text)779   bool LookingAt(const string& text) {
780     return tokenizer_.current().text == text;
781   }
782 
783   // Returns true if the current token's type is equal to that specified.
LookingAtType(io::Tokenizer::TokenType token_type)784   bool LookingAtType(io::Tokenizer::TokenType token_type) {
785     return tokenizer_.current().type == token_type;
786   }
787 
788   // Consumes an identifier and saves its value in the identifier parameter.
789   // Returns false if the token is not of type IDENTFIER.
ConsumeIdentifier(string * identifier)790   bool ConsumeIdentifier(string* identifier) {
791     if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
792       *identifier = tokenizer_.current().text;
793       tokenizer_.Next();
794       return true;
795     }
796 
797     // If allow_field_numer_ or allow_unknown_field_ is true, we should able
798     // to parse integer identifiers.
799     if ((allow_field_number_ || allow_unknown_field_)
800         && LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
801       *identifier = tokenizer_.current().text;
802       tokenizer_.Next();
803       return true;
804     }
805 
806     ReportError("Expected identifier.");
807     return false;
808   }
809 
810   // Consumes a string and saves its value in the text parameter.
811   // Returns false if the token is not of type STRING.
ConsumeString(string * text)812   bool ConsumeString(string* text) {
813     if (!LookingAtType(io::Tokenizer::TYPE_STRING)) {
814       ReportError("Expected string.");
815       return false;
816     }
817 
818     text->clear();
819     while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
820       io::Tokenizer::ParseStringAppend(tokenizer_.current().text, text);
821 
822       tokenizer_.Next();
823     }
824 
825     return true;
826   }
827 
828   // Consumes a uint64 and saves its value in the value parameter.
829   // Returns false if the token is not of type INTEGER.
ConsumeUnsignedInteger(uint64 * value,uint64 max_value)830   bool ConsumeUnsignedInteger(uint64* value, uint64 max_value) {
831     if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
832       ReportError("Expected integer.");
833       return false;
834     }
835 
836     if (!io::Tokenizer::ParseInteger(tokenizer_.current().text,
837                                      max_value, value)) {
838       ReportError("Integer out of range.");
839       return false;
840     }
841 
842     tokenizer_.Next();
843     return true;
844   }
845 
846   // Consumes an int64 and saves its value in the value parameter.
847   // Note that since the tokenizer does not support negative numbers,
848   // we actually may consume an additional token (for the minus sign) in this
849   // method. Returns false if the token is not an integer
850   // (signed or otherwise).
ConsumeSignedInteger(int64 * value,uint64 max_value)851   bool ConsumeSignedInteger(int64* value, uint64 max_value) {
852     bool negative = false;
853 
854     if (TryConsume("-")) {
855       negative = true;
856       // Two's complement always allows one more negative integer than
857       // positive.
858       ++max_value;
859     }
860 
861     uint64 unsigned_value;
862 
863     DO(ConsumeUnsignedInteger(&unsigned_value, max_value));
864 
865     *value = static_cast<int64>(unsigned_value);
866 
867     if (negative) {
868       *value = -*value;
869     }
870 
871     return true;
872   }
873 
874   // Consumes a uint64 and saves its value in the value parameter.
875   // Accepts decimal numbers only, rejects hex or oct numbers.
ConsumeUnsignedDecimalInteger(uint64 * value,uint64 max_value)876   bool ConsumeUnsignedDecimalInteger(uint64* value, uint64 max_value) {
877     if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
878       ReportError("Expected integer.");
879       return false;
880     }
881 
882     const string& text = tokenizer_.current().text;
883     if (IsHexNumber(text) || IsOctNumber(text)) {
884       ReportError("Expect a decimal number.");
885       return false;
886     }
887 
888     if (!io::Tokenizer::ParseInteger(text, max_value, value)) {
889       ReportError("Integer out of range.");
890       return false;
891     }
892 
893     tokenizer_.Next();
894     return true;
895   }
896 
897   // Consumes a double and saves its value in the value parameter.
898   // Note that since the tokenizer does not support negative numbers,
899   // we actually may consume an additional token (for the minus sign) in this
900   // method. Returns false if the token is not a double
901   // (signed or otherwise).
ConsumeDouble(double * value)902   bool ConsumeDouble(double* value) {
903     bool negative = false;
904 
905     if (TryConsume("-")) {
906       negative = true;
907     }
908 
909     // A double can actually be an integer, according to the tokenizer.
910     // Therefore, we must check both cases here.
911     if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
912       // We have found an integer value for the double.
913       uint64 integer_value;
914       DO(ConsumeUnsignedDecimalInteger(&integer_value, kuint64max));
915 
916       *value = static_cast<double>(integer_value);
917     } else if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
918       // We have found a float value for the double.
919       *value = io::Tokenizer::ParseFloat(tokenizer_.current().text);
920 
921       // Mark the current token as consumed.
922       tokenizer_.Next();
923     } else if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
924       string text = tokenizer_.current().text;
925       LowerString(&text);
926       if (text == "inf" ||
927           text == "infinity") {
928         *value = std::numeric_limits<double>::infinity();
929         tokenizer_.Next();
930       } else if (text == "nan") {
931         *value = std::numeric_limits<double>::quiet_NaN();
932         tokenizer_.Next();
933       } else {
934         ReportError("Expected double.");
935         return false;
936       }
937     } else {
938       ReportError("Expected double.");
939       return false;
940     }
941 
942     if (negative) {
943       *value = -*value;
944     }
945 
946     return true;
947   }
948 
949   // Consumes a token and confirms that it matches that specified in the
950   // value parameter. Returns false if the token found does not match that
951   // which was specified.
Consume(const string & value)952   bool Consume(const string& value) {
953     const string& current_value = tokenizer_.current().text;
954 
955     if (current_value != value) {
956       ReportError("Expected \"" + value + "\", found \"" + current_value
957                   + "\".");
958       return false;
959     }
960 
961     tokenizer_.Next();
962 
963     return true;
964   }
965 
966   // Attempts to consume the supplied value. Returns false if a the
967   // token found does not match the value specified.
TryConsume(const string & value)968   bool TryConsume(const string& value) {
969     if (tokenizer_.current().text == value) {
970       tokenizer_.Next();
971       return true;
972     } else {
973       return false;
974     }
975   }
976 
977   // An internal instance of the Tokenizer's error collector, used to
978   // collect any base-level parse errors and feed them to the ParserImpl.
979   class ParserErrorCollector : public io::ErrorCollector {
980    public:
ParserErrorCollector(TextFormat::Parser::ParserImpl * parser)981     explicit ParserErrorCollector(TextFormat::Parser::ParserImpl* parser) :
982         parser_(parser) { }
983 
~ParserErrorCollector()984     virtual ~ParserErrorCollector() { }
985 
AddError(int line,int column,const string & message)986     virtual void AddError(int line, int column, const string& message) {
987       parser_->ReportError(line, column, message);
988     }
989 
AddWarning(int line,int column,const string & message)990     virtual void AddWarning(int line, int column, const string& message) {
991       parser_->ReportWarning(line, column, message);
992     }
993 
994    private:
995     GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserErrorCollector);
996     TextFormat::Parser::ParserImpl* parser_;
997   };
998 
999   io::ErrorCollector* error_collector_;
1000   TextFormat::Finder* finder_;
1001   ParseInfoTree* parse_info_tree_;
1002   ParserErrorCollector tokenizer_error_collector_;
1003   io::Tokenizer tokenizer_;
1004   const Descriptor* root_message_type_;
1005   SingularOverwritePolicy singular_overwrite_policy_;
1006   const bool allow_case_insensitive_field_;
1007   const bool allow_unknown_field_;
1008   const bool allow_unknown_enum_;
1009   const bool allow_field_number_;
1010   bool had_errors_;
1011 };
1012 
1013 #undef DO
1014 
1015 // ===========================================================================
1016 // Internal class for writing text to the io::ZeroCopyOutputStream. Adapted
1017 // from the Printer found in //google/protobuf/io/printer.h
1018 class TextFormat::Printer::TextGenerator {
1019  public:
TextGenerator(io::ZeroCopyOutputStream * output,int initial_indent_level)1020   explicit TextGenerator(io::ZeroCopyOutputStream* output,
1021                          int initial_indent_level)
1022     : output_(output),
1023       buffer_(NULL),
1024       buffer_size_(0),
1025       at_start_of_line_(true),
1026       failed_(false),
1027       indent_(""),
1028       initial_indent_level_(initial_indent_level) {
1029     indent_.resize(initial_indent_level_ * 2, ' ');
1030   }
1031 
~TextGenerator()1032   ~TextGenerator() {
1033     // Only BackUp() if we're sure we've successfully called Next() at least
1034     // once.
1035     if (!failed_ && buffer_size_ > 0) {
1036       output_->BackUp(buffer_size_);
1037     }
1038   }
1039 
1040   // Indent text by two spaces.  After calling Indent(), two spaces will be
1041   // inserted at the beginning of each line of text.  Indent() may be called
1042   // multiple times to produce deeper indents.
Indent()1043   void Indent() {
1044     indent_ += "  ";
1045   }
1046 
1047   // Reduces the current indent level by two spaces, or crashes if the indent
1048   // level is zero.
Outdent()1049   void Outdent() {
1050     if (indent_.empty() ||
1051         indent_.size() < initial_indent_level_ * 2) {
1052       GOOGLE_LOG(DFATAL) << " Outdent() without matching Indent().";
1053       return;
1054     }
1055 
1056     indent_.resize(indent_.size() - 2);
1057   }
1058 
1059   // Print text to the output stream.
Print(const string & str)1060   void Print(const string& str) {
1061     Print(str.data(), str.size());
1062   }
1063 
1064   // Print text to the output stream.
Print(const char * text)1065   void Print(const char* text) {
1066     Print(text, strlen(text));
1067   }
1068 
1069   // Print text to the output stream.
Print(const char * text,int size)1070   void Print(const char* text, int size) {
1071     int pos = 0;  // The number of bytes we've written so far.
1072 
1073     for (int i = 0; i < size; i++) {
1074       if (text[i] == '\n') {
1075         // Saw newline.  If there is more text, we may need to insert an indent
1076         // here.  So, write what we have so far, including the '\n'.
1077         Write(text + pos, i - pos + 1);
1078         pos = i + 1;
1079 
1080         // Setting this true will cause the next Write() to insert an indent
1081         // first.
1082         at_start_of_line_ = true;
1083       }
1084     }
1085 
1086     // Write the rest.
1087     Write(text + pos, size - pos);
1088   }
1089 
1090   // True if any write to the underlying stream failed.  (We don't just
1091   // crash in this case because this is an I/O failure, not a programming
1092   // error.)
failed() const1093   bool failed() const { return failed_; }
1094 
1095  private:
1096   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextGenerator);
1097 
Write(const char * data,int size)1098   void Write(const char* data, int size) {
1099     if (failed_) return;
1100     if (size == 0) return;
1101 
1102     if (at_start_of_line_) {
1103       // Insert an indent.
1104       at_start_of_line_ = false;
1105       Write(indent_.data(), indent_.size());
1106       if (failed_) return;
1107     }
1108 
1109     while (size > buffer_size_) {
1110       // Data exceeds space in the buffer.  Copy what we can and request a
1111       // new buffer.
1112       memcpy(buffer_, data, buffer_size_);
1113       data += buffer_size_;
1114       size -= buffer_size_;
1115       void* void_buffer;
1116       failed_ = !output_->Next(&void_buffer, &buffer_size_);
1117       if (failed_) return;
1118       buffer_ = reinterpret_cast<char*>(void_buffer);
1119     }
1120 
1121     // Buffer is big enough to receive the data; copy it.
1122     memcpy(buffer_, data, size);
1123     buffer_ += size;
1124     buffer_size_ -= size;
1125   }
1126 
1127   io::ZeroCopyOutputStream* const output_;
1128   char* buffer_;
1129   int buffer_size_;
1130   bool at_start_of_line_;
1131   bool failed_;
1132 
1133   string indent_;
1134   int initial_indent_level_;
1135 };
1136 
1137 // ===========================================================================
1138 
~Finder()1139 TextFormat::Finder::~Finder() {
1140 }
1141 
Parser()1142 TextFormat::Parser::Parser()
1143   : error_collector_(NULL),
1144     finder_(NULL),
1145     parse_info_tree_(NULL),
1146     allow_partial_(false),
1147     allow_case_insensitive_field_(false),
1148     allow_unknown_field_(false),
1149     allow_unknown_enum_(false),
1150     allow_field_number_(false),
1151     allow_relaxed_whitespace_(false),
1152     allow_singular_overwrites_(false) {
1153 }
1154 
~Parser()1155 TextFormat::Parser::~Parser() {}
1156 
Parse(io::ZeroCopyInputStream * input,Message * output)1157 bool TextFormat::Parser::Parse(io::ZeroCopyInputStream* input,
1158                                Message* output) {
1159   output->Clear();
1160 
1161   ParserImpl::SingularOverwritePolicy overwrites_policy =
1162       allow_singular_overwrites_
1163       ? ParserImpl::ALLOW_SINGULAR_OVERWRITES
1164       : ParserImpl::FORBID_SINGULAR_OVERWRITES;
1165 
1166   ParserImpl parser(output->GetDescriptor(), input, error_collector_,
1167                     finder_, parse_info_tree_,
1168                     overwrites_policy,
1169                     allow_case_insensitive_field_, allow_unknown_field_,
1170                     allow_unknown_enum_, allow_field_number_,
1171                     allow_relaxed_whitespace_);
1172   return MergeUsingImpl(input, output, &parser);
1173 }
1174 
ParseFromString(const string & input,Message * output)1175 bool TextFormat::Parser::ParseFromString(const string& input,
1176                                          Message* output) {
1177   io::ArrayInputStream input_stream(input.data(), input.size());
1178   return Parse(&input_stream, output);
1179 }
1180 
Merge(io::ZeroCopyInputStream * input,Message * output)1181 bool TextFormat::Parser::Merge(io::ZeroCopyInputStream* input,
1182                                Message* output) {
1183   ParserImpl parser(output->GetDescriptor(), input, error_collector_,
1184                     finder_, parse_info_tree_,
1185                     ParserImpl::ALLOW_SINGULAR_OVERWRITES,
1186                     allow_case_insensitive_field_, allow_unknown_field_,
1187                     allow_unknown_enum_, allow_field_number_,
1188                     allow_relaxed_whitespace_);
1189   return MergeUsingImpl(input, output, &parser);
1190 }
1191 
MergeFromString(const string & input,Message * output)1192 bool TextFormat::Parser::MergeFromString(const string& input,
1193                                          Message* output) {
1194   io::ArrayInputStream input_stream(input.data(), input.size());
1195   return Merge(&input_stream, output);
1196 }
1197 
MergeUsingImpl(io::ZeroCopyInputStream *,Message * output,ParserImpl * parser_impl)1198 bool TextFormat::Parser::MergeUsingImpl(io::ZeroCopyInputStream* /* input */,
1199                                         Message* output,
1200                                         ParserImpl* parser_impl) {
1201   if (!parser_impl->Parse(output)) return false;
1202   if (!allow_partial_ && !output->IsInitialized()) {
1203     vector<string> missing_fields;
1204     output->FindInitializationErrors(&missing_fields);
1205     parser_impl->ReportError(-1, 0, "Message missing required fields: " +
1206                                         Join(missing_fields, ", "));
1207     return false;
1208   }
1209   return true;
1210 }
1211 
ParseFieldValueFromString(const string & input,const FieldDescriptor * field,Message * output)1212 bool TextFormat::Parser::ParseFieldValueFromString(
1213     const string& input,
1214     const FieldDescriptor* field,
1215     Message* output) {
1216   io::ArrayInputStream input_stream(input.data(), input.size());
1217   ParserImpl parser(output->GetDescriptor(), &input_stream, error_collector_,
1218                     finder_, parse_info_tree_,
1219                     ParserImpl::ALLOW_SINGULAR_OVERWRITES,
1220                     allow_case_insensitive_field_, allow_unknown_field_,
1221                     allow_unknown_enum_, allow_field_number_,
1222                     allow_relaxed_whitespace_);
1223   return parser.ParseField(field, output);
1224 }
1225 
Parse(io::ZeroCopyInputStream * input,Message * output)1226 /* static */ bool TextFormat::Parse(io::ZeroCopyInputStream* input,
1227                                     Message* output) {
1228   return Parser().Parse(input, output);
1229 }
1230 
Merge(io::ZeroCopyInputStream * input,Message * output)1231 /* static */ bool TextFormat::Merge(io::ZeroCopyInputStream* input,
1232                                     Message* output) {
1233   return Parser().Merge(input, output);
1234 }
1235 
ParseFromString(const string & input,Message * output)1236 /* static */ bool TextFormat::ParseFromString(const string& input,
1237                                               Message* output) {
1238   return Parser().ParseFromString(input, output);
1239 }
1240 
MergeFromString(const string & input,Message * output)1241 /* static */ bool TextFormat::MergeFromString(const string& input,
1242                                               Message* output) {
1243   return Parser().MergeFromString(input, output);
1244 }
1245 
1246 // ===========================================================================
1247 
1248 // The default implementation for FieldValuePrinter. The base class just
1249 // does simple formatting. That way, deriving classes could decide to fallback
1250 // to that behavior.
FieldValuePrinter()1251 TextFormat::FieldValuePrinter::FieldValuePrinter() {}
~FieldValuePrinter()1252 TextFormat::FieldValuePrinter::~FieldValuePrinter() {}
PrintBool(bool val) const1253 string TextFormat::FieldValuePrinter::PrintBool(bool val) const {
1254   return val ? "true" : "false";
1255 }
PrintInt32(int32 val) const1256 string TextFormat::FieldValuePrinter::PrintInt32(int32 val) const {
1257   return SimpleItoa(val);
1258 }
PrintUInt32(uint32 val) const1259 string TextFormat::FieldValuePrinter::PrintUInt32(uint32 val) const {
1260   return SimpleItoa(val);
1261 }
PrintInt64(int64 val) const1262 string TextFormat::FieldValuePrinter::PrintInt64(int64 val) const {
1263   return SimpleItoa(val);
1264 }
PrintUInt64(uint64 val) const1265 string TextFormat::FieldValuePrinter::PrintUInt64(uint64 val) const {
1266   return SimpleItoa(val);
1267 }
PrintFloat(float val) const1268 string TextFormat::FieldValuePrinter::PrintFloat(float val) const {
1269   return SimpleFtoa(val);
1270 }
PrintDouble(double val) const1271 string TextFormat::FieldValuePrinter::PrintDouble(double val) const {
1272   return SimpleDtoa(val);
1273 }
PrintString(const string & val) const1274 string TextFormat::FieldValuePrinter::PrintString(const string& val) const {
1275   return StrCat("\"", CEscape(val), "\"");
1276 }
PrintBytes(const string & val) const1277 string TextFormat::FieldValuePrinter::PrintBytes(const string& val) const {
1278   return PrintString(val);
1279 }
PrintEnum(int32 val,const string & name) const1280 string TextFormat::FieldValuePrinter::PrintEnum(int32 val,
1281                                                 const string& name) const {
1282   return name;
1283 }
PrintFieldName(const Message & message,const Reflection * reflection,const FieldDescriptor * field) const1284 string TextFormat::FieldValuePrinter::PrintFieldName(
1285     const Message& message,
1286     const Reflection* reflection,
1287     const FieldDescriptor* field) const {
1288   if (field->is_extension()) {
1289     // We special-case MessageSet elements for compatibility with proto1.
1290     if (field->containing_type()->options().message_set_wire_format()
1291         && field->type() == FieldDescriptor::TYPE_MESSAGE
1292         && field->is_optional()
1293         && field->extension_scope() == field->message_type()) {
1294       return StrCat("[", field->message_type()->full_name(), "]");
1295     } else {
1296       return StrCat("[", field->full_name(), "]");
1297     }
1298   } else if (field->type() == FieldDescriptor::TYPE_GROUP) {
1299     // Groups must be serialized with their original capitalization.
1300     return field->message_type()->name();
1301   } else {
1302     return field->name();
1303   }
1304 }
PrintMessageStart(const Message & message,int field_index,int field_count,bool single_line_mode) const1305 string TextFormat::FieldValuePrinter::PrintMessageStart(
1306     const Message& message,
1307     int field_index,
1308     int field_count,
1309     bool single_line_mode) const {
1310   return single_line_mode ? " { " : " {\n";
1311 }
PrintMessageEnd(const Message & message,int field_index,int field_count,bool single_line_mode) const1312 string TextFormat::FieldValuePrinter::PrintMessageEnd(
1313     const Message& message,
1314     int field_index,
1315     int field_count,
1316     bool single_line_mode) const {
1317   return single_line_mode ? "} " : "}\n";
1318 }
1319 
1320 namespace {
1321 // Our own specialization: for UTF8 escaped strings.
1322 class FieldValuePrinterUtf8Escaping : public TextFormat::FieldValuePrinter {
1323  public:
PrintString(const string & val) const1324   virtual string PrintString(const string& val) const {
1325     return StrCat("\"", strings::Utf8SafeCEscape(val), "\"");
1326   }
PrintBytes(const string & val) const1327   virtual string PrintBytes(const string& val) const {
1328     return TextFormat::FieldValuePrinter::PrintString(val);
1329   }
1330 };
1331 
1332 }  // namespace
1333 
Printer()1334 TextFormat::Printer::Printer()
1335   : initial_indent_level_(0),
1336     single_line_mode_(false),
1337     use_field_number_(false),
1338     use_short_repeated_primitives_(false),
1339     hide_unknown_fields_(false),
1340     print_message_fields_in_index_order_(false) {
1341   SetUseUtf8StringEscaping(false);
1342 }
1343 
~Printer()1344 TextFormat::Printer::~Printer() {
1345   STLDeleteValues(&custom_printers_);
1346 }
1347 
SetUseUtf8StringEscaping(bool as_utf8)1348 void TextFormat::Printer::SetUseUtf8StringEscaping(bool as_utf8) {
1349   SetDefaultFieldValuePrinter(as_utf8
1350                               ? new FieldValuePrinterUtf8Escaping()
1351                               : new FieldValuePrinter());
1352 }
1353 
SetDefaultFieldValuePrinter(const FieldValuePrinter * printer)1354 void TextFormat::Printer::SetDefaultFieldValuePrinter(
1355     const FieldValuePrinter* printer) {
1356   default_field_value_printer_.reset(printer);
1357 }
1358 
RegisterFieldValuePrinter(const FieldDescriptor * field,const FieldValuePrinter * printer)1359 bool TextFormat::Printer::RegisterFieldValuePrinter(
1360     const FieldDescriptor* field,
1361     const FieldValuePrinter* printer) {
1362   return field != NULL
1363       && printer != NULL
1364       && custom_printers_.insert(make_pair(field, printer)).second;
1365 }
1366 
PrintToString(const Message & message,string * output) const1367 bool TextFormat::Printer::PrintToString(const Message& message,
1368                                         string* output) const {
1369   GOOGLE_DCHECK(output) << "output specified is NULL";
1370 
1371   output->clear();
1372   io::StringOutputStream output_stream(output);
1373 
1374   return Print(message, &output_stream);
1375 }
1376 
PrintUnknownFieldsToString(const UnknownFieldSet & unknown_fields,string * output) const1377 bool TextFormat::Printer::PrintUnknownFieldsToString(
1378     const UnknownFieldSet& unknown_fields,
1379     string* output) const {
1380   GOOGLE_DCHECK(output) << "output specified is NULL";
1381 
1382   output->clear();
1383   io::StringOutputStream output_stream(output);
1384   return PrintUnknownFields(unknown_fields, &output_stream);
1385 }
1386 
Print(const Message & message,io::ZeroCopyOutputStream * output) const1387 bool TextFormat::Printer::Print(const Message& message,
1388                                 io::ZeroCopyOutputStream* output) const {
1389   TextGenerator generator(output, initial_indent_level_);
1390 
1391   Print(message, generator);
1392 
1393   // Output false if the generator failed internally.
1394   return !generator.failed();
1395 }
1396 
PrintUnknownFields(const UnknownFieldSet & unknown_fields,io::ZeroCopyOutputStream * output) const1397 bool TextFormat::Printer::PrintUnknownFields(
1398     const UnknownFieldSet& unknown_fields,
1399     io::ZeroCopyOutputStream* output) const {
1400   TextGenerator generator(output, initial_indent_level_);
1401 
1402   PrintUnknownFields(unknown_fields, generator);
1403 
1404   // Output false if the generator failed internally.
1405   return !generator.failed();
1406 }
1407 
1408 namespace {
1409 // Comparison functor for sorting FieldDescriptors by field index.
1410 struct FieldIndexSorter {
operator ()google::protobuf::__anon395507550311::FieldIndexSorter1411   bool operator()(const FieldDescriptor* left,
1412                   const FieldDescriptor* right) const {
1413     return left->index() < right->index();
1414   }
1415 };
1416 }  // namespace
1417 
Print(const Message & message,TextGenerator & generator) const1418 void TextFormat::Printer::Print(const Message& message,
1419                                 TextGenerator& generator) const {
1420   const Reflection* reflection = message.GetReflection();
1421   vector<const FieldDescriptor*> fields;
1422   reflection->ListFields(message, &fields);
1423   if (print_message_fields_in_index_order_) {
1424     sort(fields.begin(), fields.end(), FieldIndexSorter());
1425   }
1426   for (int i = 0; i < fields.size(); i++) {
1427     PrintField(message, reflection, fields[i], generator);
1428   }
1429   if (!hide_unknown_fields_) {
1430     PrintUnknownFields(reflection->GetUnknownFields(message), generator);
1431   }
1432 }
1433 
PrintFieldValueToString(const Message & message,const FieldDescriptor * field,int index,string * output) const1434 void TextFormat::Printer::PrintFieldValueToString(
1435     const Message& message,
1436     const FieldDescriptor* field,
1437     int index,
1438     string* output) const {
1439 
1440   GOOGLE_DCHECK(output) << "output specified is NULL";
1441 
1442   output->clear();
1443   io::StringOutputStream output_stream(output);
1444   TextGenerator generator(&output_stream, initial_indent_level_);
1445 
1446   PrintFieldValue(message, message.GetReflection(), field, index, generator);
1447 }
1448 
PrintField(const Message & message,const Reflection * reflection,const FieldDescriptor * field,TextGenerator & generator) const1449 void TextFormat::Printer::PrintField(const Message& message,
1450                                      const Reflection* reflection,
1451                                      const FieldDescriptor* field,
1452                                      TextGenerator& generator) const {
1453   if (use_short_repeated_primitives_ &&
1454       field->is_repeated() &&
1455       field->cpp_type() != FieldDescriptor::CPPTYPE_STRING &&
1456       field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
1457     PrintShortRepeatedField(message, reflection, field, generator);
1458     return;
1459   }
1460 
1461   int count = 0;
1462 
1463   if (field->is_repeated()) {
1464     count = reflection->FieldSize(message, field);
1465   } else if (reflection->HasField(message, field)) {
1466     count = 1;
1467   }
1468 
1469   for (int j = 0; j < count; ++j) {
1470     const int field_index = field->is_repeated() ? j : -1;
1471 
1472     PrintFieldName(message, reflection, field, generator);
1473 
1474     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
1475       const FieldValuePrinter* printer = FindWithDefault(
1476           custom_printers_, field, default_field_value_printer_.get());
1477       const Message& sub_message =
1478               field->is_repeated()
1479               ? reflection->GetRepeatedMessage(message, field, j)
1480               : reflection->GetMessage(message, field);
1481       generator.Print(
1482           printer->PrintMessageStart(
1483               sub_message, field_index, count, single_line_mode_));
1484       generator.Indent();
1485       Print(sub_message, generator);
1486       generator.Outdent();
1487       generator.Print(
1488           printer->PrintMessageEnd(
1489               sub_message, field_index, count, single_line_mode_));
1490     } else {
1491       generator.Print(": ");
1492       // Write the field value.
1493       PrintFieldValue(message, reflection, field, field_index, generator);
1494       if (single_line_mode_) {
1495         generator.Print(" ");
1496       } else {
1497         generator.Print("\n");
1498       }
1499     }
1500   }
1501 }
1502 
PrintShortRepeatedField(const Message & message,const Reflection * reflection,const FieldDescriptor * field,TextGenerator & generator) const1503 void TextFormat::Printer::PrintShortRepeatedField(
1504     const Message& message,
1505     const Reflection* reflection,
1506     const FieldDescriptor* field,
1507     TextGenerator& generator) const {
1508   // Print primitive repeated field in short form.
1509   PrintFieldName(message, reflection, field, generator);
1510 
1511   int size = reflection->FieldSize(message, field);
1512   generator.Print(": [");
1513   for (int i = 0; i < size; i++) {
1514     if (i > 0) generator.Print(", ");
1515     PrintFieldValue(message, reflection, field, i, generator);
1516   }
1517   if (single_line_mode_) {
1518     generator.Print("] ");
1519   } else {
1520     generator.Print("]\n");
1521   }
1522 }
1523 
PrintFieldName(const Message & message,const Reflection * reflection,const FieldDescriptor * field,TextGenerator & generator) const1524 void TextFormat::Printer::PrintFieldName(const Message& message,
1525                                          const Reflection* reflection,
1526                                          const FieldDescriptor* field,
1527                                          TextGenerator& generator) const {
1528   // if use_field_number_ is true, prints field number instead
1529   // of field name.
1530   if (use_field_number_) {
1531     generator.Print(SimpleItoa(field->number()));
1532     return;
1533   }
1534 
1535   const FieldValuePrinter* printer = FindWithDefault(
1536       custom_printers_, field, default_field_value_printer_.get());
1537   generator.Print(printer->PrintFieldName(message, reflection, field));
1538 }
1539 
PrintFieldValue(const Message & message,const Reflection * reflection,const FieldDescriptor * field,int index,TextGenerator & generator) const1540 void TextFormat::Printer::PrintFieldValue(
1541     const Message& message,
1542     const Reflection* reflection,
1543     const FieldDescriptor* field,
1544     int index,
1545     TextGenerator& generator) const {
1546   GOOGLE_DCHECK(field->is_repeated() || (index == -1))
1547       << "Index must be -1 for non-repeated fields";
1548 
1549   const FieldValuePrinter* printer
1550       = FindWithDefault(custom_printers_, field,
1551                         default_field_value_printer_.get());
1552 
1553   switch (field->cpp_type()) {
1554 #define OUTPUT_FIELD(CPPTYPE, METHOD)                                   \
1555     case FieldDescriptor::CPPTYPE_##CPPTYPE:                            \
1556       generator.Print(printer->Print##METHOD(field->is_repeated()       \
1557                ? reflection->GetRepeated##METHOD(message, field, index) \
1558                : reflection->Get##METHOD(message, field)));             \
1559         break
1560 
1561     OUTPUT_FIELD( INT32,  Int32);
1562     OUTPUT_FIELD( INT64,  Int64);
1563     OUTPUT_FIELD(UINT32, UInt32);
1564     OUTPUT_FIELD(UINT64, UInt64);
1565     OUTPUT_FIELD( FLOAT,  Float);
1566     OUTPUT_FIELD(DOUBLE, Double);
1567     OUTPUT_FIELD(  BOOL,   Bool);
1568 #undef OUTPUT_FIELD
1569 
1570     case FieldDescriptor::CPPTYPE_STRING: {
1571       string scratch;
1572       const string& value = field->is_repeated()
1573           ? reflection->GetRepeatedStringReference(
1574               message, field, index, &scratch)
1575           : reflection->GetStringReference(message, field, &scratch);
1576       if (field->type() == FieldDescriptor::TYPE_STRING) {
1577         generator.Print(printer->PrintString(value));
1578       } else {
1579         GOOGLE_DCHECK_EQ(field->type(), FieldDescriptor::TYPE_BYTES);
1580         generator.Print(printer->PrintBytes(value));
1581       }
1582       break;
1583     }
1584 
1585     case FieldDescriptor::CPPTYPE_ENUM: {
1586       const EnumValueDescriptor *enum_val = field->is_repeated()
1587           ? reflection->GetRepeatedEnum(message, field, index)
1588           : reflection->GetEnum(message, field);
1589       generator.Print(printer->PrintEnum(enum_val->number(), enum_val->name()));
1590       break;
1591     }
1592 
1593     case FieldDescriptor::CPPTYPE_MESSAGE:
1594       Print(field->is_repeated()
1595             ? reflection->GetRepeatedMessage(message, field, index)
1596             : reflection->GetMessage(message, field),
1597             generator);
1598       break;
1599   }
1600 }
1601 
Print(const Message & message,io::ZeroCopyOutputStream * output)1602 /* static */ bool TextFormat::Print(const Message& message,
1603                                     io::ZeroCopyOutputStream* output) {
1604   return Printer().Print(message, output);
1605 }
1606 
PrintUnknownFields(const UnknownFieldSet & unknown_fields,io::ZeroCopyOutputStream * output)1607 /* static */ bool TextFormat::PrintUnknownFields(
1608     const UnknownFieldSet& unknown_fields,
1609     io::ZeroCopyOutputStream* output) {
1610   return Printer().PrintUnknownFields(unknown_fields, output);
1611 }
1612 
PrintToString(const Message & message,string * output)1613 /* static */ bool TextFormat::PrintToString(
1614     const Message& message, string* output) {
1615   return Printer().PrintToString(message, output);
1616 }
1617 
PrintUnknownFieldsToString(const UnknownFieldSet & unknown_fields,string * output)1618 /* static */ bool TextFormat::PrintUnknownFieldsToString(
1619     const UnknownFieldSet& unknown_fields, string* output) {
1620   return Printer().PrintUnknownFieldsToString(unknown_fields, output);
1621 }
1622 
PrintFieldValueToString(const Message & message,const FieldDescriptor * field,int index,string * output)1623 /* static */ void TextFormat::PrintFieldValueToString(
1624     const Message& message,
1625     const FieldDescriptor* field,
1626     int index,
1627     string* output) {
1628   return Printer().PrintFieldValueToString(message, field, index, output);
1629 }
1630 
ParseFieldValueFromString(const string & input,const FieldDescriptor * field,Message * message)1631 /* static */ bool TextFormat::ParseFieldValueFromString(
1632     const string& input,
1633     const FieldDescriptor* field,
1634     Message* message) {
1635   return Parser().ParseFieldValueFromString(input, field, message);
1636 }
1637 
1638 // Prints an integer as hex with a fixed number of digits dependent on the
1639 // integer type.
1640 template<typename IntType>
PaddedHex(IntType value)1641 static string PaddedHex(IntType value) {
1642   string result;
1643   result.reserve(sizeof(value) * 2);
1644   for (int i = sizeof(value) * 2 - 1; i >= 0; i--) {
1645     result.push_back(int_to_hex_digit(value >> (i*4) & 0x0F));
1646   }
1647   return result;
1648 }
1649 
PrintUnknownFields(const UnknownFieldSet & unknown_fields,TextGenerator & generator) const1650 void TextFormat::Printer::PrintUnknownFields(
1651     const UnknownFieldSet& unknown_fields, TextGenerator& generator) const {
1652   for (int i = 0; i < unknown_fields.field_count(); i++) {
1653     const UnknownField& field = unknown_fields.field(i);
1654     string field_number = SimpleItoa(field.number());
1655 
1656     switch (field.type()) {
1657       case UnknownField::TYPE_VARINT:
1658         generator.Print(field_number);
1659         generator.Print(": ");
1660         generator.Print(SimpleItoa(field.varint()));
1661         if (single_line_mode_) {
1662           generator.Print(" ");
1663         } else {
1664           generator.Print("\n");
1665         }
1666         break;
1667       case UnknownField::TYPE_FIXED32: {
1668         generator.Print(field_number);
1669         generator.Print(": 0x");
1670         char buffer[kFastToBufferSize];
1671         generator.Print(FastHex32ToBuffer(field.fixed32(), buffer));
1672         if (single_line_mode_) {
1673           generator.Print(" ");
1674         } else {
1675           generator.Print("\n");
1676         }
1677         break;
1678       }
1679       case UnknownField::TYPE_FIXED64: {
1680         generator.Print(field_number);
1681         generator.Print(": 0x");
1682         char buffer[kFastToBufferSize];
1683         generator.Print(FastHex64ToBuffer(field.fixed64(), buffer));
1684         if (single_line_mode_) {
1685           generator.Print(" ");
1686         } else {
1687           generator.Print("\n");
1688         }
1689         break;
1690       }
1691       case UnknownField::TYPE_LENGTH_DELIMITED: {
1692         generator.Print(field_number);
1693         const string& value = field.length_delimited();
1694         UnknownFieldSet embedded_unknown_fields;
1695         if (!value.empty() && embedded_unknown_fields.ParseFromString(value)) {
1696           // This field is parseable as a Message.
1697           // So it is probably an embedded message.
1698           if (single_line_mode_) {
1699             generator.Print(" { ");
1700           } else {
1701             generator.Print(" {\n");
1702             generator.Indent();
1703           }
1704           PrintUnknownFields(embedded_unknown_fields, generator);
1705           if (single_line_mode_) {
1706             generator.Print("} ");
1707           } else {
1708             generator.Outdent();
1709             generator.Print("}\n");
1710           }
1711         } else {
1712           // This field is not parseable as a Message.
1713           // So it is probably just a plain string.
1714           generator.Print(": \"");
1715           generator.Print(CEscape(value));
1716           generator.Print("\"");
1717           if (single_line_mode_) {
1718             generator.Print(" ");
1719           } else {
1720             generator.Print("\n");
1721           }
1722         }
1723         break;
1724       }
1725       case UnknownField::TYPE_GROUP:
1726         generator.Print(field_number);
1727         if (single_line_mode_) {
1728           generator.Print(" { ");
1729         } else {
1730           generator.Print(" {\n");
1731           generator.Indent();
1732         }
1733         PrintUnknownFields(field.group(), generator);
1734         if (single_line_mode_) {
1735           generator.Print("} ");
1736         } else {
1737           generator.Outdent();
1738           generator.Print("}\n");
1739         }
1740         break;
1741     }
1742   }
1743 }
1744 
1745 }  // namespace protobuf
1746 }  // namespace google
1747