1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: jschorr@google.com (Joseph Schorr)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34
35 #include <algorithm>
36 #include <float.h>
37 #include <math.h>
38 #include <stdio.h>
39 #include <stack>
40 #include <limits>
41 #include <vector>
42
43 #include <google/protobuf/text_format.h>
44
45 #include <google/protobuf/descriptor.h>
46 #include <google/protobuf/wire_format_lite.h>
47 #include <google/protobuf/io/coded_stream.h>
48 #include <google/protobuf/io/zero_copy_stream.h>
49 #include <google/protobuf/io/zero_copy_stream_impl.h>
50 #include <google/protobuf/unknown_field_set.h>
51 #include <google/protobuf/descriptor.pb.h>
52 #include <google/protobuf/io/tokenizer.h>
53 #include <google/protobuf/stubs/strutil.h>
54 #include <google/protobuf/stubs/map_util.h>
55 #include <google/protobuf/stubs/stl_util.h>
56
57 namespace google {
58 namespace protobuf {
59
60 namespace {
61
IsHexNumber(const string & str)62 inline bool IsHexNumber(const string& str) {
63 return (str.length() >= 2 && str[0] == '0' &&
64 (str[1] == 'x' || str[1] == 'X'));
65 }
66
IsOctNumber(const string & str)67 inline bool IsOctNumber(const string& str) {
68 return (str.length() >= 2 && str[0] == '0' &&
69 (str[1] >= '0' && str[1] < '8'));
70 }
71
72 } // namespace
73
DebugString() const74 string Message::DebugString() const {
75 string debug_string;
76
77 TextFormat::PrintToString(*this, &debug_string);
78
79 return debug_string;
80 }
81
ShortDebugString() const82 string Message::ShortDebugString() const {
83 string debug_string;
84
85 TextFormat::Printer printer;
86 printer.SetSingleLineMode(true);
87
88 printer.PrintToString(*this, &debug_string);
89 // Single line mode currently might have an extra space at the end.
90 if (debug_string.size() > 0 &&
91 debug_string[debug_string.size() - 1] == ' ') {
92 debug_string.resize(debug_string.size() - 1);
93 }
94
95 return debug_string;
96 }
97
Utf8DebugString() const98 string Message::Utf8DebugString() const {
99 string debug_string;
100
101 TextFormat::Printer printer;
102 printer.SetUseUtf8StringEscaping(true);
103
104 printer.PrintToString(*this, &debug_string);
105
106 return debug_string;
107 }
108
PrintDebugString() const109 void Message::PrintDebugString() const {
110 printf("%s", DebugString().c_str());
111 }
112
113
114 // ===========================================================================
115 // Implementation of the parse information tree class.
ParseInfoTree()116 TextFormat::ParseInfoTree::ParseInfoTree() { }
117
~ParseInfoTree()118 TextFormat::ParseInfoTree::~ParseInfoTree() {
119 // Remove any nested information trees, as they are owned by this tree.
120 for (NestedMap::iterator it = nested_.begin(); it != nested_.end(); ++it) {
121 STLDeleteElements(&(it->second));
122 }
123 }
124
RecordLocation(const FieldDescriptor * field,TextFormat::ParseLocation location)125 void TextFormat::ParseInfoTree::RecordLocation(
126 const FieldDescriptor* field,
127 TextFormat::ParseLocation location) {
128 locations_[field].push_back(location);
129 }
130
CreateNested(const FieldDescriptor * field)131 TextFormat::ParseInfoTree* TextFormat::ParseInfoTree::CreateNested(
132 const FieldDescriptor* field) {
133 // Owned by us in the map.
134 TextFormat::ParseInfoTree* instance = new TextFormat::ParseInfoTree();
135 vector<TextFormat::ParseInfoTree*>* trees = &nested_[field];
136 GOOGLE_CHECK(trees);
137 trees->push_back(instance);
138 return instance;
139 }
140
CheckFieldIndex(const FieldDescriptor * field,int index)141 void CheckFieldIndex(const FieldDescriptor* field, int index) {
142 if (field == NULL) { return; }
143
144 if (field->is_repeated() && index == -1) {
145 GOOGLE_LOG(DFATAL) << "Index must be in range of repeated field values. "
146 << "Field: " << field->name();
147 } else if (!field->is_repeated() && index != -1) {
148 GOOGLE_LOG(DFATAL) << "Index must be -1 for singular fields."
149 << "Field: " << field->name();
150 }
151 }
152
GetLocation(const FieldDescriptor * field,int index) const153 TextFormat::ParseLocation TextFormat::ParseInfoTree::GetLocation(
154 const FieldDescriptor* field, int index) const {
155 CheckFieldIndex(field, index);
156 if (index == -1) { index = 0; }
157
158 const vector<TextFormat::ParseLocation>* locations =
159 FindOrNull(locations_, field);
160 if (locations == NULL || index >= locations->size()) {
161 return TextFormat::ParseLocation();
162 }
163
164 return (*locations)[index];
165 }
166
GetTreeForNested(const FieldDescriptor * field,int index) const167 TextFormat::ParseInfoTree* TextFormat::ParseInfoTree::GetTreeForNested(
168 const FieldDescriptor* field, int index) const {
169 CheckFieldIndex(field, index);
170 if (index == -1) { index = 0; }
171
172 const vector<TextFormat::ParseInfoTree*>* trees = FindOrNull(nested_, field);
173 if (trees == NULL || index >= trees->size()) {
174 return NULL;
175 }
176
177 return (*trees)[index];
178 }
179
180
181 // ===========================================================================
182 // Internal class for parsing an ASCII representation of a Protocol Message.
183 // This class makes use of the Protocol Message compiler's tokenizer found
184 // in //google/protobuf/io/tokenizer.h. Note that class's Parse
185 // method is *not* thread-safe and should only be used in a single thread at
186 // a time.
187
188 // Makes code slightly more readable. The meaning of "DO(foo)" is
189 // "Execute foo and fail if it fails.", where failure is indicated by
190 // returning false. Borrowed from parser.cc (Thanks Kenton!).
191 #define DO(STATEMENT) if (STATEMENT) {} else return false
192
193 class TextFormat::Parser::ParserImpl {
194 public:
195
196 // Determines if repeated values for non-repeated fields and
197 // oneofs are permitted, e.g., the string "foo: 1 foo: 2" for a
198 // required/optional field named "foo", or "baz: 1 qux: 2"
199 // where "baz" and "qux" are members of the same oneof.
200 enum SingularOverwritePolicy {
201 ALLOW_SINGULAR_OVERWRITES = 0, // the last value is retained
202 FORBID_SINGULAR_OVERWRITES = 1, // an error is issued
203 };
204
ParserImpl(const Descriptor * root_message_type,io::ZeroCopyInputStream * input_stream,io::ErrorCollector * error_collector,TextFormat::Finder * finder,ParseInfoTree * parse_info_tree,SingularOverwritePolicy singular_overwrite_policy,bool allow_case_insensitive_field,bool allow_unknown_field,bool allow_unknown_enum,bool allow_field_number,bool allow_relaxed_whitespace)205 ParserImpl(const Descriptor* root_message_type,
206 io::ZeroCopyInputStream* input_stream,
207 io::ErrorCollector* error_collector,
208 TextFormat::Finder* finder,
209 ParseInfoTree* parse_info_tree,
210 SingularOverwritePolicy singular_overwrite_policy,
211 bool allow_case_insensitive_field,
212 bool allow_unknown_field,
213 bool allow_unknown_enum,
214 bool allow_field_number,
215 bool allow_relaxed_whitespace)
216 : error_collector_(error_collector),
217 finder_(finder),
218 parse_info_tree_(parse_info_tree),
219 tokenizer_error_collector_(this),
220 tokenizer_(input_stream, &tokenizer_error_collector_),
221 root_message_type_(root_message_type),
222 singular_overwrite_policy_(singular_overwrite_policy),
223 allow_case_insensitive_field_(allow_case_insensitive_field),
224 allow_unknown_field_(allow_unknown_field),
225 allow_unknown_enum_(allow_unknown_enum),
226 allow_field_number_(allow_field_number),
227 had_errors_(false) {
228 // For backwards-compatibility with proto1, we need to allow the 'f' suffix
229 // for floats.
230 tokenizer_.set_allow_f_after_float(true);
231
232 // '#' starts a comment.
233 tokenizer_.set_comment_style(io::Tokenizer::SH_COMMENT_STYLE);
234
235 if (allow_relaxed_whitespace) {
236 tokenizer_.set_require_space_after_number(false);
237 tokenizer_.set_allow_multiline_strings(true);
238 }
239
240 // Consume the starting token.
241 tokenizer_.Next();
242 }
~ParserImpl()243 ~ParserImpl() { }
244
245 // Parses the ASCII representation specified in input and saves the
246 // information into the output pointer (a Message). Returns
247 // false if an error occurs (an error will also be logged to
248 // GOOGLE_LOG(ERROR)).
Parse(Message * output)249 bool Parse(Message* output) {
250 // Consume fields until we cannot do so anymore.
251 while (true) {
252 if (LookingAtType(io::Tokenizer::TYPE_END)) {
253 return !had_errors_;
254 }
255
256 DO(ConsumeField(output));
257 }
258 }
259
ParseField(const FieldDescriptor * field,Message * output)260 bool ParseField(const FieldDescriptor* field, Message* output) {
261 bool suc;
262 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
263 suc = ConsumeFieldMessage(output, output->GetReflection(), field);
264 } else {
265 suc = ConsumeFieldValue(output, output->GetReflection(), field);
266 }
267 return suc && LookingAtType(io::Tokenizer::TYPE_END);
268 }
269
ReportError(int line,int col,const string & message)270 void ReportError(int line, int col, const string& message) {
271 had_errors_ = true;
272 if (error_collector_ == NULL) {
273 if (line >= 0) {
274 GOOGLE_LOG(ERROR) << "Error parsing text-format "
275 << root_message_type_->full_name()
276 << ": " << (line + 1) << ":"
277 << (col + 1) << ": " << message;
278 } else {
279 GOOGLE_LOG(ERROR) << "Error parsing text-format "
280 << root_message_type_->full_name()
281 << ": " << message;
282 }
283 } else {
284 error_collector_->AddError(line, col, message);
285 }
286 }
287
ReportWarning(int line,int col,const string & message)288 void ReportWarning(int line, int col, const string& message) {
289 if (error_collector_ == NULL) {
290 if (line >= 0) {
291 GOOGLE_LOG(WARNING) << "Warning parsing text-format "
292 << root_message_type_->full_name()
293 << ": " << (line + 1) << ":"
294 << (col + 1) << ": " << message;
295 } else {
296 GOOGLE_LOG(WARNING) << "Warning parsing text-format "
297 << root_message_type_->full_name()
298 << ": " << message;
299 }
300 } else {
301 error_collector_->AddWarning(line, col, message);
302 }
303 }
304
305 private:
306 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserImpl);
307
308 // Reports an error with the given message with information indicating
309 // the position (as derived from the current token).
ReportError(const string & message)310 void ReportError(const string& message) {
311 ReportError(tokenizer_.current().line, tokenizer_.current().column,
312 message);
313 }
314
315 // Reports a warning with the given message with information indicating
316 // the position (as derived from the current token).
ReportWarning(const string & message)317 void ReportWarning(const string& message) {
318 ReportWarning(tokenizer_.current().line, tokenizer_.current().column,
319 message);
320 }
321
322 // Consumes the specified message with the given starting delimeter.
323 // This method checks to see that the end delimeter at the conclusion of
324 // the consumption matches the starting delimeter passed in here.
ConsumeMessage(Message * message,const string delimeter)325 bool ConsumeMessage(Message* message, const string delimeter) {
326 while (!LookingAt(">") && !LookingAt("}")) {
327 DO(ConsumeField(message));
328 }
329
330 // Confirm that we have a valid ending delimeter.
331 DO(Consume(delimeter));
332
333 return true;
334 }
335
336
337 // Consumes the current field (as returned by the tokenizer) on the
338 // passed in message.
ConsumeField(Message * message)339 bool ConsumeField(Message* message) {
340 const Reflection* reflection = message->GetReflection();
341 const Descriptor* descriptor = message->GetDescriptor();
342
343 string field_name;
344
345 const FieldDescriptor* field = NULL;
346 int start_line = tokenizer_.current().line;
347 int start_column = tokenizer_.current().column;
348
349 if (TryConsume("[")) {
350 // Extension.
351 DO(ConsumeIdentifier(&field_name));
352 while (TryConsume(".")) {
353 string part;
354 DO(ConsumeIdentifier(&part));
355 field_name += ".";
356 field_name += part;
357 }
358 DO(Consume("]"));
359
360 field = (finder_ != NULL
361 ? finder_->FindExtension(message, field_name)
362 : reflection->FindKnownExtensionByName(field_name));
363
364 if (field == NULL) {
365 if (!allow_unknown_field_) {
366 ReportError("Extension \"" + field_name + "\" is not defined or "
367 "is not an extension of \"" +
368 descriptor->full_name() + "\".");
369 return false;
370 } else {
371 ReportWarning("Extension \"" + field_name + "\" is not defined or "
372 "is not an extension of \"" +
373 descriptor->full_name() + "\".");
374 }
375 }
376 } else {
377 DO(ConsumeIdentifier(&field_name));
378
379 int32 field_number;
380 if (allow_field_number_ && safe_strto32(field_name, &field_number)) {
381 if (descriptor->IsExtensionNumber(field_number)) {
382 field = reflection->FindKnownExtensionByNumber(field_number);
383 } else {
384 field = descriptor->FindFieldByNumber(field_number);
385 }
386 } else {
387 field = descriptor->FindFieldByName(field_name);
388 // Group names are expected to be capitalized as they appear in the
389 // .proto file, which actually matches their type names, not their
390 // field names.
391 if (field == NULL) {
392 string lower_field_name = field_name;
393 LowerString(&lower_field_name);
394 field = descriptor->FindFieldByName(lower_field_name);
395 // If the case-insensitive match worked but the field is NOT a group,
396 if (field != NULL && field->type() != FieldDescriptor::TYPE_GROUP) {
397 field = NULL;
398 }
399 }
400 // Again, special-case group names as described above.
401 if (field != NULL && field->type() == FieldDescriptor::TYPE_GROUP
402 && field->message_type()->name() != field_name) {
403 field = NULL;
404 }
405
406 if (field == NULL && allow_case_insensitive_field_) {
407 string lower_field_name = field_name;
408 LowerString(&lower_field_name);
409 field = descriptor->FindFieldByLowercaseName(lower_field_name);
410 }
411 }
412
413 if (field == NULL) {
414 if (!allow_unknown_field_) {
415 ReportError("Message type \"" + descriptor->full_name() +
416 "\" has no field named \"" + field_name + "\".");
417 return false;
418 } else {
419 ReportWarning("Message type \"" + descriptor->full_name() +
420 "\" has no field named \"" + field_name + "\".");
421 }
422 }
423 }
424
425 // Skips unknown field.
426 if (field == NULL) {
427 GOOGLE_CHECK(allow_unknown_field_);
428 // Try to guess the type of this field.
429 // If this field is not a message, there should be a ":" between the
430 // field name and the field value and also the field value should not
431 // start with "{" or "<" which indicates the begining of a message body.
432 // If there is no ":" or there is a "{" or "<" after ":", this field has
433 // to be a message or the input is ill-formed.
434 if (TryConsume(":") && !LookingAt("{") && !LookingAt("<")) {
435 return SkipFieldValue();
436 } else {
437 return SkipFieldMessage();
438 }
439 }
440
441 if (singular_overwrite_policy_ == FORBID_SINGULAR_OVERWRITES) {
442 // Fail if the field is not repeated and it has already been specified.
443 if (!field->is_repeated() && reflection->HasField(*message, field)) {
444 ReportError("Non-repeated field \"" + field_name +
445 "\" is specified multiple times.");
446 return false;
447 }
448 // Fail if the field is a member of a oneof and another member has already
449 // been specified.
450 const OneofDescriptor* oneof = field->containing_oneof();
451 if (oneof != NULL && reflection->HasOneof(*message, oneof)) {
452 const FieldDescriptor* other_field =
453 reflection->GetOneofFieldDescriptor(*message, oneof);
454 ReportError("Field \"" + field_name + "\" is specified along with "
455 "field \"" + other_field->name() + "\", another member "
456 "of oneof \"" + oneof->name() + "\".");
457 return false;
458 }
459 }
460
461 // Perform special handling for embedded message types.
462 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
463 // ':' is optional here.
464 TryConsume(":");
465 } else {
466 // ':' is required here.
467 DO(Consume(":"));
468 }
469
470 if (field->is_repeated() && TryConsume("[")) {
471 // Short repeated format, e.g. "foo: [1, 2, 3]"
472 while (true) {
473 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
474 // Perform special handling for embedded message types.
475 DO(ConsumeFieldMessage(message, reflection, field));
476 } else {
477 DO(ConsumeFieldValue(message, reflection, field));
478 }
479 if (TryConsume("]")) {
480 break;
481 }
482 DO(Consume(","));
483 }
484 } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
485 DO(ConsumeFieldMessage(message, reflection, field));
486 } else {
487 DO(ConsumeFieldValue(message, reflection, field));
488 }
489
490 // For historical reasons, fields may optionally be separated by commas or
491 // semicolons.
492 TryConsume(";") || TryConsume(",");
493
494 if (field->options().deprecated()) {
495 ReportWarning("text format contains deprecated field \""
496 + field_name + "\"");
497 }
498
499 // If a parse info tree exists, add the location for the parsed
500 // field.
501 if (parse_info_tree_ != NULL) {
502 RecordLocation(parse_info_tree_, field,
503 ParseLocation(start_line, start_column));
504 }
505
506 return true;
507 }
508
509 // Skips the next field including the field's name and value.
SkipField()510 bool SkipField() {
511 string field_name;
512 if (TryConsume("[")) {
513 // Extension name.
514 DO(ConsumeIdentifier(&field_name));
515 while (TryConsume(".")) {
516 string part;
517 DO(ConsumeIdentifier(&part));
518 field_name += ".";
519 field_name += part;
520 }
521 DO(Consume("]"));
522 } else {
523 DO(ConsumeIdentifier(&field_name));
524 }
525
526 // Try to guess the type of this field.
527 // If this field is not a message, there should be a ":" between the
528 // field name and the field value and also the field value should not
529 // start with "{" or "<" which indicates the begining of a message body.
530 // If there is no ":" or there is a "{" or "<" after ":", this field has
531 // to be a message or the input is ill-formed.
532 if (TryConsume(":") && !LookingAt("{") && !LookingAt("<")) {
533 DO(SkipFieldValue());
534 } else {
535 DO(SkipFieldMessage());
536 }
537 // For historical reasons, fields may optionally be separated by commas or
538 // semicolons.
539 TryConsume(";") || TryConsume(",");
540 return true;
541 }
542
ConsumeFieldMessage(Message * message,const Reflection * reflection,const FieldDescriptor * field)543 bool ConsumeFieldMessage(Message* message,
544 const Reflection* reflection,
545 const FieldDescriptor* field) {
546
547 // If the parse information tree is not NULL, create a nested one
548 // for the nested message.
549 ParseInfoTree* parent = parse_info_tree_;
550 if (parent != NULL) {
551 parse_info_tree_ = CreateNested(parent, field);
552 }
553
554 string delimeter;
555 if (TryConsume("<")) {
556 delimeter = ">";
557 } else {
558 DO(Consume("{"));
559 delimeter = "}";
560 }
561
562 if (field->is_repeated()) {
563 DO(ConsumeMessage(reflection->AddMessage(message, field), delimeter));
564 } else {
565 DO(ConsumeMessage(reflection->MutableMessage(message, field),
566 delimeter));
567 }
568
569 // Reset the parse information tree.
570 parse_info_tree_ = parent;
571 return true;
572 }
573
574 // Skips the whole body of a message including the begining delimeter and
575 // the ending delimeter.
SkipFieldMessage()576 bool SkipFieldMessage() {
577 string delimeter;
578 if (TryConsume("<")) {
579 delimeter = ">";
580 } else {
581 DO(Consume("{"));
582 delimeter = "}";
583 }
584 while (!LookingAt(">") && !LookingAt("}")) {
585 DO(SkipField());
586 }
587 DO(Consume(delimeter));
588 return true;
589 }
590
ConsumeFieldValue(Message * message,const Reflection * reflection,const FieldDescriptor * field)591 bool ConsumeFieldValue(Message* message,
592 const Reflection* reflection,
593 const FieldDescriptor* field) {
594
595 // Define an easy to use macro for setting fields. This macro checks
596 // to see if the field is repeated (in which case we need to use the Add
597 // methods or not (in which case we need to use the Set methods).
598 #define SET_FIELD(CPPTYPE, VALUE) \
599 if (field->is_repeated()) { \
600 reflection->Add##CPPTYPE(message, field, VALUE); \
601 } else { \
602 reflection->Set##CPPTYPE(message, field, VALUE); \
603 } \
604
605 switch(field->cpp_type()) {
606 case FieldDescriptor::CPPTYPE_INT32: {
607 int64 value;
608 DO(ConsumeSignedInteger(&value, kint32max));
609 SET_FIELD(Int32, static_cast<int32>(value));
610 break;
611 }
612
613 case FieldDescriptor::CPPTYPE_UINT32: {
614 uint64 value;
615 DO(ConsumeUnsignedInteger(&value, kuint32max));
616 SET_FIELD(UInt32, static_cast<uint32>(value));
617 break;
618 }
619
620 case FieldDescriptor::CPPTYPE_INT64: {
621 int64 value;
622 DO(ConsumeSignedInteger(&value, kint64max));
623 SET_FIELD(Int64, value);
624 break;
625 }
626
627 case FieldDescriptor::CPPTYPE_UINT64: {
628 uint64 value;
629 DO(ConsumeUnsignedInteger(&value, kuint64max));
630 SET_FIELD(UInt64, value);
631 break;
632 }
633
634 case FieldDescriptor::CPPTYPE_FLOAT: {
635 double value;
636 DO(ConsumeDouble(&value));
637 SET_FIELD(Float, static_cast<float>(value));
638 break;
639 }
640
641 case FieldDescriptor::CPPTYPE_DOUBLE: {
642 double value;
643 DO(ConsumeDouble(&value));
644 SET_FIELD(Double, value);
645 break;
646 }
647
648 case FieldDescriptor::CPPTYPE_STRING: {
649 string value;
650 DO(ConsumeString(&value));
651 SET_FIELD(String, value);
652 break;
653 }
654
655 case FieldDescriptor::CPPTYPE_BOOL: {
656 if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
657 uint64 value;
658 DO(ConsumeUnsignedInteger(&value, 1));
659 SET_FIELD(Bool, value);
660 } else {
661 string value;
662 DO(ConsumeIdentifier(&value));
663 if (value == "true" || value == "True" || value == "t") {
664 SET_FIELD(Bool, true);
665 } else if (value == "false" || value == "False" || value == "f") {
666 SET_FIELD(Bool, false);
667 } else {
668 ReportError("Invalid value for boolean field \"" + field->name()
669 + "\". Value: \"" + value + "\".");
670 return false;
671 }
672 }
673 break;
674 }
675
676 case FieldDescriptor::CPPTYPE_ENUM: {
677 string value;
678 const EnumDescriptor* enum_type = field->enum_type();
679 const EnumValueDescriptor* enum_value = NULL;
680
681 if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
682 DO(ConsumeIdentifier(&value));
683 // Find the enumeration value.
684 enum_value = enum_type->FindValueByName(value);
685
686 } else if (LookingAt("-") ||
687 LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
688 int64 int_value;
689 DO(ConsumeSignedInteger(&int_value, kint32max));
690 value = SimpleItoa(int_value); // for error reporting
691 enum_value = enum_type->FindValueByNumber(int_value);
692 } else {
693 ReportError("Expected integer or identifier.");
694 return false;
695 }
696
697 if (enum_value == NULL) {
698 if (!allow_unknown_enum_) {
699 ReportError("Unknown enumeration value of \"" + value + "\" for "
700 "field \"" + field->name() + "\".");
701 return false;
702 } else {
703 ReportWarning("Unknown enumeration value of \"" + value + "\" for "
704 "field \"" + field->name() + "\".");
705 return true;
706 }
707 }
708
709 SET_FIELD(Enum, enum_value);
710 break;
711 }
712
713 case FieldDescriptor::CPPTYPE_MESSAGE: {
714 // We should never get here. Put here instead of a default
715 // so that if new types are added, we get a nice compiler warning.
716 GOOGLE_LOG(FATAL) << "Reached an unintended state: CPPTYPE_MESSAGE";
717 break;
718 }
719 }
720 #undef SET_FIELD
721 return true;
722 }
723
SkipFieldValue()724 bool SkipFieldValue() {
725 if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
726 while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
727 tokenizer_.Next();
728 }
729 return true;
730 }
731 // Possible field values other than string:
732 // 12345 => TYPE_INTEGER
733 // -12345 => TYPE_SYMBOL + TYPE_INTEGER
734 // 1.2345 => TYPE_FLOAT
735 // -1.2345 => TYPE_SYMBOL + TYPE_FLOAT
736 // inf => TYPE_IDENTIFIER
737 // -inf => TYPE_SYMBOL + TYPE_IDENTIFIER
738 // TYPE_INTEGER => TYPE_IDENTIFIER
739 // Divides them into two group, one with TYPE_SYMBOL
740 // and the other without:
741 // Group one:
742 // 12345 => TYPE_INTEGER
743 // 1.2345 => TYPE_FLOAT
744 // inf => TYPE_IDENTIFIER
745 // TYPE_INTEGER => TYPE_IDENTIFIER
746 // Group two:
747 // -12345 => TYPE_SYMBOL + TYPE_INTEGER
748 // -1.2345 => TYPE_SYMBOL + TYPE_FLOAT
749 // -inf => TYPE_SYMBOL + TYPE_IDENTIFIER
750 // As we can see, the field value consists of an optional '-' and one of
751 // TYPE_INTEGER, TYPE_FLOAT and TYPE_IDENTIFIER.
752 bool has_minus = TryConsume("-");
753 if (!LookingAtType(io::Tokenizer::TYPE_INTEGER) &&
754 !LookingAtType(io::Tokenizer::TYPE_FLOAT) &&
755 !LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
756 return false;
757 }
758 // Combination of '-' and TYPE_IDENTIFIER may result in an invalid field
759 // value while other combinations all generate valid values.
760 // We check if the value of this combination is valid here.
761 // TYPE_IDENTIFIER after a '-' should be one of the float values listed
762 // below:
763 // inf, inff, infinity, nan
764 if (has_minus && LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
765 string text = tokenizer_.current().text;
766 LowerString(&text);
767 if (text != "inf" &&
768 text != "infinity" &&
769 text != "nan") {
770 ReportError("Invalid float number: " + text);
771 return false;
772 }
773 }
774 tokenizer_.Next();
775 return true;
776 }
777
778 // Returns true if the current token's text is equal to that specified.
LookingAt(const string & text)779 bool LookingAt(const string& text) {
780 return tokenizer_.current().text == text;
781 }
782
783 // Returns true if the current token's type is equal to that specified.
LookingAtType(io::Tokenizer::TokenType token_type)784 bool LookingAtType(io::Tokenizer::TokenType token_type) {
785 return tokenizer_.current().type == token_type;
786 }
787
788 // Consumes an identifier and saves its value in the identifier parameter.
789 // Returns false if the token is not of type IDENTFIER.
ConsumeIdentifier(string * identifier)790 bool ConsumeIdentifier(string* identifier) {
791 if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
792 *identifier = tokenizer_.current().text;
793 tokenizer_.Next();
794 return true;
795 }
796
797 // If allow_field_numer_ or allow_unknown_field_ is true, we should able
798 // to parse integer identifiers.
799 if ((allow_field_number_ || allow_unknown_field_)
800 && LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
801 *identifier = tokenizer_.current().text;
802 tokenizer_.Next();
803 return true;
804 }
805
806 ReportError("Expected identifier.");
807 return false;
808 }
809
810 // Consumes a string and saves its value in the text parameter.
811 // Returns false if the token is not of type STRING.
ConsumeString(string * text)812 bool ConsumeString(string* text) {
813 if (!LookingAtType(io::Tokenizer::TYPE_STRING)) {
814 ReportError("Expected string.");
815 return false;
816 }
817
818 text->clear();
819 while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
820 io::Tokenizer::ParseStringAppend(tokenizer_.current().text, text);
821
822 tokenizer_.Next();
823 }
824
825 return true;
826 }
827
828 // Consumes a uint64 and saves its value in the value parameter.
829 // Returns false if the token is not of type INTEGER.
ConsumeUnsignedInteger(uint64 * value,uint64 max_value)830 bool ConsumeUnsignedInteger(uint64* value, uint64 max_value) {
831 if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
832 ReportError("Expected integer.");
833 return false;
834 }
835
836 if (!io::Tokenizer::ParseInteger(tokenizer_.current().text,
837 max_value, value)) {
838 ReportError("Integer out of range.");
839 return false;
840 }
841
842 tokenizer_.Next();
843 return true;
844 }
845
846 // Consumes an int64 and saves its value in the value parameter.
847 // Note that since the tokenizer does not support negative numbers,
848 // we actually may consume an additional token (for the minus sign) in this
849 // method. Returns false if the token is not an integer
850 // (signed or otherwise).
ConsumeSignedInteger(int64 * value,uint64 max_value)851 bool ConsumeSignedInteger(int64* value, uint64 max_value) {
852 bool negative = false;
853
854 if (TryConsume("-")) {
855 negative = true;
856 // Two's complement always allows one more negative integer than
857 // positive.
858 ++max_value;
859 }
860
861 uint64 unsigned_value;
862
863 DO(ConsumeUnsignedInteger(&unsigned_value, max_value));
864
865 *value = static_cast<int64>(unsigned_value);
866
867 if (negative) {
868 *value = -*value;
869 }
870
871 return true;
872 }
873
874 // Consumes a uint64 and saves its value in the value parameter.
875 // Accepts decimal numbers only, rejects hex or oct numbers.
ConsumeUnsignedDecimalInteger(uint64 * value,uint64 max_value)876 bool ConsumeUnsignedDecimalInteger(uint64* value, uint64 max_value) {
877 if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
878 ReportError("Expected integer.");
879 return false;
880 }
881
882 const string& text = tokenizer_.current().text;
883 if (IsHexNumber(text) || IsOctNumber(text)) {
884 ReportError("Expect a decimal number.");
885 return false;
886 }
887
888 if (!io::Tokenizer::ParseInteger(text, max_value, value)) {
889 ReportError("Integer out of range.");
890 return false;
891 }
892
893 tokenizer_.Next();
894 return true;
895 }
896
897 // Consumes a double and saves its value in the value parameter.
898 // Note that since the tokenizer does not support negative numbers,
899 // we actually may consume an additional token (for the minus sign) in this
900 // method. Returns false if the token is not a double
901 // (signed or otherwise).
ConsumeDouble(double * value)902 bool ConsumeDouble(double* value) {
903 bool negative = false;
904
905 if (TryConsume("-")) {
906 negative = true;
907 }
908
909 // A double can actually be an integer, according to the tokenizer.
910 // Therefore, we must check both cases here.
911 if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
912 // We have found an integer value for the double.
913 uint64 integer_value;
914 DO(ConsumeUnsignedDecimalInteger(&integer_value, kuint64max));
915
916 *value = static_cast<double>(integer_value);
917 } else if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
918 // We have found a float value for the double.
919 *value = io::Tokenizer::ParseFloat(tokenizer_.current().text);
920
921 // Mark the current token as consumed.
922 tokenizer_.Next();
923 } else if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
924 string text = tokenizer_.current().text;
925 LowerString(&text);
926 if (text == "inf" ||
927 text == "infinity") {
928 *value = std::numeric_limits<double>::infinity();
929 tokenizer_.Next();
930 } else if (text == "nan") {
931 *value = std::numeric_limits<double>::quiet_NaN();
932 tokenizer_.Next();
933 } else {
934 ReportError("Expected double.");
935 return false;
936 }
937 } else {
938 ReportError("Expected double.");
939 return false;
940 }
941
942 if (negative) {
943 *value = -*value;
944 }
945
946 return true;
947 }
948
949 // Consumes a token and confirms that it matches that specified in the
950 // value parameter. Returns false if the token found does not match that
951 // which was specified.
Consume(const string & value)952 bool Consume(const string& value) {
953 const string& current_value = tokenizer_.current().text;
954
955 if (current_value != value) {
956 ReportError("Expected \"" + value + "\", found \"" + current_value
957 + "\".");
958 return false;
959 }
960
961 tokenizer_.Next();
962
963 return true;
964 }
965
966 // Attempts to consume the supplied value. Returns false if a the
967 // token found does not match the value specified.
TryConsume(const string & value)968 bool TryConsume(const string& value) {
969 if (tokenizer_.current().text == value) {
970 tokenizer_.Next();
971 return true;
972 } else {
973 return false;
974 }
975 }
976
977 // An internal instance of the Tokenizer's error collector, used to
978 // collect any base-level parse errors and feed them to the ParserImpl.
979 class ParserErrorCollector : public io::ErrorCollector {
980 public:
ParserErrorCollector(TextFormat::Parser::ParserImpl * parser)981 explicit ParserErrorCollector(TextFormat::Parser::ParserImpl* parser) :
982 parser_(parser) { }
983
~ParserErrorCollector()984 virtual ~ParserErrorCollector() { }
985
AddError(int line,int column,const string & message)986 virtual void AddError(int line, int column, const string& message) {
987 parser_->ReportError(line, column, message);
988 }
989
AddWarning(int line,int column,const string & message)990 virtual void AddWarning(int line, int column, const string& message) {
991 parser_->ReportWarning(line, column, message);
992 }
993
994 private:
995 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserErrorCollector);
996 TextFormat::Parser::ParserImpl* parser_;
997 };
998
999 io::ErrorCollector* error_collector_;
1000 TextFormat::Finder* finder_;
1001 ParseInfoTree* parse_info_tree_;
1002 ParserErrorCollector tokenizer_error_collector_;
1003 io::Tokenizer tokenizer_;
1004 const Descriptor* root_message_type_;
1005 SingularOverwritePolicy singular_overwrite_policy_;
1006 const bool allow_case_insensitive_field_;
1007 const bool allow_unknown_field_;
1008 const bool allow_unknown_enum_;
1009 const bool allow_field_number_;
1010 bool had_errors_;
1011 };
1012
1013 #undef DO
1014
1015 // ===========================================================================
1016 // Internal class for writing text to the io::ZeroCopyOutputStream. Adapted
1017 // from the Printer found in //google/protobuf/io/printer.h
1018 class TextFormat::Printer::TextGenerator {
1019 public:
TextGenerator(io::ZeroCopyOutputStream * output,int initial_indent_level)1020 explicit TextGenerator(io::ZeroCopyOutputStream* output,
1021 int initial_indent_level)
1022 : output_(output),
1023 buffer_(NULL),
1024 buffer_size_(0),
1025 at_start_of_line_(true),
1026 failed_(false),
1027 indent_(""),
1028 initial_indent_level_(initial_indent_level) {
1029 indent_.resize(initial_indent_level_ * 2, ' ');
1030 }
1031
~TextGenerator()1032 ~TextGenerator() {
1033 // Only BackUp() if we're sure we've successfully called Next() at least
1034 // once.
1035 if (!failed_ && buffer_size_ > 0) {
1036 output_->BackUp(buffer_size_);
1037 }
1038 }
1039
1040 // Indent text by two spaces. After calling Indent(), two spaces will be
1041 // inserted at the beginning of each line of text. Indent() may be called
1042 // multiple times to produce deeper indents.
Indent()1043 void Indent() {
1044 indent_ += " ";
1045 }
1046
1047 // Reduces the current indent level by two spaces, or crashes if the indent
1048 // level is zero.
Outdent()1049 void Outdent() {
1050 if (indent_.empty() ||
1051 indent_.size() < initial_indent_level_ * 2) {
1052 GOOGLE_LOG(DFATAL) << " Outdent() without matching Indent().";
1053 return;
1054 }
1055
1056 indent_.resize(indent_.size() - 2);
1057 }
1058
1059 // Print text to the output stream.
Print(const string & str)1060 void Print(const string& str) {
1061 Print(str.data(), str.size());
1062 }
1063
1064 // Print text to the output stream.
Print(const char * text)1065 void Print(const char* text) {
1066 Print(text, strlen(text));
1067 }
1068
1069 // Print text to the output stream.
Print(const char * text,int size)1070 void Print(const char* text, int size) {
1071 int pos = 0; // The number of bytes we've written so far.
1072
1073 for (int i = 0; i < size; i++) {
1074 if (text[i] == '\n') {
1075 // Saw newline. If there is more text, we may need to insert an indent
1076 // here. So, write what we have so far, including the '\n'.
1077 Write(text + pos, i - pos + 1);
1078 pos = i + 1;
1079
1080 // Setting this true will cause the next Write() to insert an indent
1081 // first.
1082 at_start_of_line_ = true;
1083 }
1084 }
1085
1086 // Write the rest.
1087 Write(text + pos, size - pos);
1088 }
1089
1090 // True if any write to the underlying stream failed. (We don't just
1091 // crash in this case because this is an I/O failure, not a programming
1092 // error.)
failed() const1093 bool failed() const { return failed_; }
1094
1095 private:
1096 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextGenerator);
1097
Write(const char * data,int size)1098 void Write(const char* data, int size) {
1099 if (failed_) return;
1100 if (size == 0) return;
1101
1102 if (at_start_of_line_) {
1103 // Insert an indent.
1104 at_start_of_line_ = false;
1105 Write(indent_.data(), indent_.size());
1106 if (failed_) return;
1107 }
1108
1109 while (size > buffer_size_) {
1110 // Data exceeds space in the buffer. Copy what we can and request a
1111 // new buffer.
1112 memcpy(buffer_, data, buffer_size_);
1113 data += buffer_size_;
1114 size -= buffer_size_;
1115 void* void_buffer;
1116 failed_ = !output_->Next(&void_buffer, &buffer_size_);
1117 if (failed_) return;
1118 buffer_ = reinterpret_cast<char*>(void_buffer);
1119 }
1120
1121 // Buffer is big enough to receive the data; copy it.
1122 memcpy(buffer_, data, size);
1123 buffer_ += size;
1124 buffer_size_ -= size;
1125 }
1126
1127 io::ZeroCopyOutputStream* const output_;
1128 char* buffer_;
1129 int buffer_size_;
1130 bool at_start_of_line_;
1131 bool failed_;
1132
1133 string indent_;
1134 int initial_indent_level_;
1135 };
1136
1137 // ===========================================================================
1138
~Finder()1139 TextFormat::Finder::~Finder() {
1140 }
1141
Parser()1142 TextFormat::Parser::Parser()
1143 : error_collector_(NULL),
1144 finder_(NULL),
1145 parse_info_tree_(NULL),
1146 allow_partial_(false),
1147 allow_case_insensitive_field_(false),
1148 allow_unknown_field_(false),
1149 allow_unknown_enum_(false),
1150 allow_field_number_(false),
1151 allow_relaxed_whitespace_(false),
1152 allow_singular_overwrites_(false) {
1153 }
1154
~Parser()1155 TextFormat::Parser::~Parser() {}
1156
Parse(io::ZeroCopyInputStream * input,Message * output)1157 bool TextFormat::Parser::Parse(io::ZeroCopyInputStream* input,
1158 Message* output) {
1159 output->Clear();
1160
1161 ParserImpl::SingularOverwritePolicy overwrites_policy =
1162 allow_singular_overwrites_
1163 ? ParserImpl::ALLOW_SINGULAR_OVERWRITES
1164 : ParserImpl::FORBID_SINGULAR_OVERWRITES;
1165
1166 ParserImpl parser(output->GetDescriptor(), input, error_collector_,
1167 finder_, parse_info_tree_,
1168 overwrites_policy,
1169 allow_case_insensitive_field_, allow_unknown_field_,
1170 allow_unknown_enum_, allow_field_number_,
1171 allow_relaxed_whitespace_);
1172 return MergeUsingImpl(input, output, &parser);
1173 }
1174
ParseFromString(const string & input,Message * output)1175 bool TextFormat::Parser::ParseFromString(const string& input,
1176 Message* output) {
1177 io::ArrayInputStream input_stream(input.data(), input.size());
1178 return Parse(&input_stream, output);
1179 }
1180
Merge(io::ZeroCopyInputStream * input,Message * output)1181 bool TextFormat::Parser::Merge(io::ZeroCopyInputStream* input,
1182 Message* output) {
1183 ParserImpl parser(output->GetDescriptor(), input, error_collector_,
1184 finder_, parse_info_tree_,
1185 ParserImpl::ALLOW_SINGULAR_OVERWRITES,
1186 allow_case_insensitive_field_, allow_unknown_field_,
1187 allow_unknown_enum_, allow_field_number_,
1188 allow_relaxed_whitespace_);
1189 return MergeUsingImpl(input, output, &parser);
1190 }
1191
MergeFromString(const string & input,Message * output)1192 bool TextFormat::Parser::MergeFromString(const string& input,
1193 Message* output) {
1194 io::ArrayInputStream input_stream(input.data(), input.size());
1195 return Merge(&input_stream, output);
1196 }
1197
MergeUsingImpl(io::ZeroCopyInputStream *,Message * output,ParserImpl * parser_impl)1198 bool TextFormat::Parser::MergeUsingImpl(io::ZeroCopyInputStream* /* input */,
1199 Message* output,
1200 ParserImpl* parser_impl) {
1201 if (!parser_impl->Parse(output)) return false;
1202 if (!allow_partial_ && !output->IsInitialized()) {
1203 vector<string> missing_fields;
1204 output->FindInitializationErrors(&missing_fields);
1205 parser_impl->ReportError(-1, 0, "Message missing required fields: " +
1206 Join(missing_fields, ", "));
1207 return false;
1208 }
1209 return true;
1210 }
1211
ParseFieldValueFromString(const string & input,const FieldDescriptor * field,Message * output)1212 bool TextFormat::Parser::ParseFieldValueFromString(
1213 const string& input,
1214 const FieldDescriptor* field,
1215 Message* output) {
1216 io::ArrayInputStream input_stream(input.data(), input.size());
1217 ParserImpl parser(output->GetDescriptor(), &input_stream, error_collector_,
1218 finder_, parse_info_tree_,
1219 ParserImpl::ALLOW_SINGULAR_OVERWRITES,
1220 allow_case_insensitive_field_, allow_unknown_field_,
1221 allow_unknown_enum_, allow_field_number_,
1222 allow_relaxed_whitespace_);
1223 return parser.ParseField(field, output);
1224 }
1225
Parse(io::ZeroCopyInputStream * input,Message * output)1226 /* static */ bool TextFormat::Parse(io::ZeroCopyInputStream* input,
1227 Message* output) {
1228 return Parser().Parse(input, output);
1229 }
1230
Merge(io::ZeroCopyInputStream * input,Message * output)1231 /* static */ bool TextFormat::Merge(io::ZeroCopyInputStream* input,
1232 Message* output) {
1233 return Parser().Merge(input, output);
1234 }
1235
ParseFromString(const string & input,Message * output)1236 /* static */ bool TextFormat::ParseFromString(const string& input,
1237 Message* output) {
1238 return Parser().ParseFromString(input, output);
1239 }
1240
MergeFromString(const string & input,Message * output)1241 /* static */ bool TextFormat::MergeFromString(const string& input,
1242 Message* output) {
1243 return Parser().MergeFromString(input, output);
1244 }
1245
1246 // ===========================================================================
1247
1248 // The default implementation for FieldValuePrinter. The base class just
1249 // does simple formatting. That way, deriving classes could decide to fallback
1250 // to that behavior.
FieldValuePrinter()1251 TextFormat::FieldValuePrinter::FieldValuePrinter() {}
~FieldValuePrinter()1252 TextFormat::FieldValuePrinter::~FieldValuePrinter() {}
PrintBool(bool val) const1253 string TextFormat::FieldValuePrinter::PrintBool(bool val) const {
1254 return val ? "true" : "false";
1255 }
PrintInt32(int32 val) const1256 string TextFormat::FieldValuePrinter::PrintInt32(int32 val) const {
1257 return SimpleItoa(val);
1258 }
PrintUInt32(uint32 val) const1259 string TextFormat::FieldValuePrinter::PrintUInt32(uint32 val) const {
1260 return SimpleItoa(val);
1261 }
PrintInt64(int64 val) const1262 string TextFormat::FieldValuePrinter::PrintInt64(int64 val) const {
1263 return SimpleItoa(val);
1264 }
PrintUInt64(uint64 val) const1265 string TextFormat::FieldValuePrinter::PrintUInt64(uint64 val) const {
1266 return SimpleItoa(val);
1267 }
PrintFloat(float val) const1268 string TextFormat::FieldValuePrinter::PrintFloat(float val) const {
1269 return SimpleFtoa(val);
1270 }
PrintDouble(double val) const1271 string TextFormat::FieldValuePrinter::PrintDouble(double val) const {
1272 return SimpleDtoa(val);
1273 }
PrintString(const string & val) const1274 string TextFormat::FieldValuePrinter::PrintString(const string& val) const {
1275 return StrCat("\"", CEscape(val), "\"");
1276 }
PrintBytes(const string & val) const1277 string TextFormat::FieldValuePrinter::PrintBytes(const string& val) const {
1278 return PrintString(val);
1279 }
PrintEnum(int32 val,const string & name) const1280 string TextFormat::FieldValuePrinter::PrintEnum(int32 val,
1281 const string& name) const {
1282 return name;
1283 }
PrintFieldName(const Message & message,const Reflection * reflection,const FieldDescriptor * field) const1284 string TextFormat::FieldValuePrinter::PrintFieldName(
1285 const Message& message,
1286 const Reflection* reflection,
1287 const FieldDescriptor* field) const {
1288 if (field->is_extension()) {
1289 // We special-case MessageSet elements for compatibility with proto1.
1290 if (field->containing_type()->options().message_set_wire_format()
1291 && field->type() == FieldDescriptor::TYPE_MESSAGE
1292 && field->is_optional()
1293 && field->extension_scope() == field->message_type()) {
1294 return StrCat("[", field->message_type()->full_name(), "]");
1295 } else {
1296 return StrCat("[", field->full_name(), "]");
1297 }
1298 } else if (field->type() == FieldDescriptor::TYPE_GROUP) {
1299 // Groups must be serialized with their original capitalization.
1300 return field->message_type()->name();
1301 } else {
1302 return field->name();
1303 }
1304 }
PrintMessageStart(const Message & message,int field_index,int field_count,bool single_line_mode) const1305 string TextFormat::FieldValuePrinter::PrintMessageStart(
1306 const Message& message,
1307 int field_index,
1308 int field_count,
1309 bool single_line_mode) const {
1310 return single_line_mode ? " { " : " {\n";
1311 }
PrintMessageEnd(const Message & message,int field_index,int field_count,bool single_line_mode) const1312 string TextFormat::FieldValuePrinter::PrintMessageEnd(
1313 const Message& message,
1314 int field_index,
1315 int field_count,
1316 bool single_line_mode) const {
1317 return single_line_mode ? "} " : "}\n";
1318 }
1319
1320 namespace {
1321 // Our own specialization: for UTF8 escaped strings.
1322 class FieldValuePrinterUtf8Escaping : public TextFormat::FieldValuePrinter {
1323 public:
PrintString(const string & val) const1324 virtual string PrintString(const string& val) const {
1325 return StrCat("\"", strings::Utf8SafeCEscape(val), "\"");
1326 }
PrintBytes(const string & val) const1327 virtual string PrintBytes(const string& val) const {
1328 return TextFormat::FieldValuePrinter::PrintString(val);
1329 }
1330 };
1331
1332 } // namespace
1333
Printer()1334 TextFormat::Printer::Printer()
1335 : initial_indent_level_(0),
1336 single_line_mode_(false),
1337 use_field_number_(false),
1338 use_short_repeated_primitives_(false),
1339 hide_unknown_fields_(false),
1340 print_message_fields_in_index_order_(false) {
1341 SetUseUtf8StringEscaping(false);
1342 }
1343
~Printer()1344 TextFormat::Printer::~Printer() {
1345 STLDeleteValues(&custom_printers_);
1346 }
1347
SetUseUtf8StringEscaping(bool as_utf8)1348 void TextFormat::Printer::SetUseUtf8StringEscaping(bool as_utf8) {
1349 SetDefaultFieldValuePrinter(as_utf8
1350 ? new FieldValuePrinterUtf8Escaping()
1351 : new FieldValuePrinter());
1352 }
1353
SetDefaultFieldValuePrinter(const FieldValuePrinter * printer)1354 void TextFormat::Printer::SetDefaultFieldValuePrinter(
1355 const FieldValuePrinter* printer) {
1356 default_field_value_printer_.reset(printer);
1357 }
1358
RegisterFieldValuePrinter(const FieldDescriptor * field,const FieldValuePrinter * printer)1359 bool TextFormat::Printer::RegisterFieldValuePrinter(
1360 const FieldDescriptor* field,
1361 const FieldValuePrinter* printer) {
1362 return field != NULL
1363 && printer != NULL
1364 && custom_printers_.insert(make_pair(field, printer)).second;
1365 }
1366
PrintToString(const Message & message,string * output) const1367 bool TextFormat::Printer::PrintToString(const Message& message,
1368 string* output) const {
1369 GOOGLE_DCHECK(output) << "output specified is NULL";
1370
1371 output->clear();
1372 io::StringOutputStream output_stream(output);
1373
1374 return Print(message, &output_stream);
1375 }
1376
PrintUnknownFieldsToString(const UnknownFieldSet & unknown_fields,string * output) const1377 bool TextFormat::Printer::PrintUnknownFieldsToString(
1378 const UnknownFieldSet& unknown_fields,
1379 string* output) const {
1380 GOOGLE_DCHECK(output) << "output specified is NULL";
1381
1382 output->clear();
1383 io::StringOutputStream output_stream(output);
1384 return PrintUnknownFields(unknown_fields, &output_stream);
1385 }
1386
Print(const Message & message,io::ZeroCopyOutputStream * output) const1387 bool TextFormat::Printer::Print(const Message& message,
1388 io::ZeroCopyOutputStream* output) const {
1389 TextGenerator generator(output, initial_indent_level_);
1390
1391 Print(message, generator);
1392
1393 // Output false if the generator failed internally.
1394 return !generator.failed();
1395 }
1396
PrintUnknownFields(const UnknownFieldSet & unknown_fields,io::ZeroCopyOutputStream * output) const1397 bool TextFormat::Printer::PrintUnknownFields(
1398 const UnknownFieldSet& unknown_fields,
1399 io::ZeroCopyOutputStream* output) const {
1400 TextGenerator generator(output, initial_indent_level_);
1401
1402 PrintUnknownFields(unknown_fields, generator);
1403
1404 // Output false if the generator failed internally.
1405 return !generator.failed();
1406 }
1407
1408 namespace {
1409 // Comparison functor for sorting FieldDescriptors by field index.
1410 struct FieldIndexSorter {
operator ()google::protobuf::__anon395507550311::FieldIndexSorter1411 bool operator()(const FieldDescriptor* left,
1412 const FieldDescriptor* right) const {
1413 return left->index() < right->index();
1414 }
1415 };
1416 } // namespace
1417
Print(const Message & message,TextGenerator & generator) const1418 void TextFormat::Printer::Print(const Message& message,
1419 TextGenerator& generator) const {
1420 const Reflection* reflection = message.GetReflection();
1421 vector<const FieldDescriptor*> fields;
1422 reflection->ListFields(message, &fields);
1423 if (print_message_fields_in_index_order_) {
1424 sort(fields.begin(), fields.end(), FieldIndexSorter());
1425 }
1426 for (int i = 0; i < fields.size(); i++) {
1427 PrintField(message, reflection, fields[i], generator);
1428 }
1429 if (!hide_unknown_fields_) {
1430 PrintUnknownFields(reflection->GetUnknownFields(message), generator);
1431 }
1432 }
1433
PrintFieldValueToString(const Message & message,const FieldDescriptor * field,int index,string * output) const1434 void TextFormat::Printer::PrintFieldValueToString(
1435 const Message& message,
1436 const FieldDescriptor* field,
1437 int index,
1438 string* output) const {
1439
1440 GOOGLE_DCHECK(output) << "output specified is NULL";
1441
1442 output->clear();
1443 io::StringOutputStream output_stream(output);
1444 TextGenerator generator(&output_stream, initial_indent_level_);
1445
1446 PrintFieldValue(message, message.GetReflection(), field, index, generator);
1447 }
1448
PrintField(const Message & message,const Reflection * reflection,const FieldDescriptor * field,TextGenerator & generator) const1449 void TextFormat::Printer::PrintField(const Message& message,
1450 const Reflection* reflection,
1451 const FieldDescriptor* field,
1452 TextGenerator& generator) const {
1453 if (use_short_repeated_primitives_ &&
1454 field->is_repeated() &&
1455 field->cpp_type() != FieldDescriptor::CPPTYPE_STRING &&
1456 field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
1457 PrintShortRepeatedField(message, reflection, field, generator);
1458 return;
1459 }
1460
1461 int count = 0;
1462
1463 if (field->is_repeated()) {
1464 count = reflection->FieldSize(message, field);
1465 } else if (reflection->HasField(message, field)) {
1466 count = 1;
1467 }
1468
1469 for (int j = 0; j < count; ++j) {
1470 const int field_index = field->is_repeated() ? j : -1;
1471
1472 PrintFieldName(message, reflection, field, generator);
1473
1474 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
1475 const FieldValuePrinter* printer = FindWithDefault(
1476 custom_printers_, field, default_field_value_printer_.get());
1477 const Message& sub_message =
1478 field->is_repeated()
1479 ? reflection->GetRepeatedMessage(message, field, j)
1480 : reflection->GetMessage(message, field);
1481 generator.Print(
1482 printer->PrintMessageStart(
1483 sub_message, field_index, count, single_line_mode_));
1484 generator.Indent();
1485 Print(sub_message, generator);
1486 generator.Outdent();
1487 generator.Print(
1488 printer->PrintMessageEnd(
1489 sub_message, field_index, count, single_line_mode_));
1490 } else {
1491 generator.Print(": ");
1492 // Write the field value.
1493 PrintFieldValue(message, reflection, field, field_index, generator);
1494 if (single_line_mode_) {
1495 generator.Print(" ");
1496 } else {
1497 generator.Print("\n");
1498 }
1499 }
1500 }
1501 }
1502
PrintShortRepeatedField(const Message & message,const Reflection * reflection,const FieldDescriptor * field,TextGenerator & generator) const1503 void TextFormat::Printer::PrintShortRepeatedField(
1504 const Message& message,
1505 const Reflection* reflection,
1506 const FieldDescriptor* field,
1507 TextGenerator& generator) const {
1508 // Print primitive repeated field in short form.
1509 PrintFieldName(message, reflection, field, generator);
1510
1511 int size = reflection->FieldSize(message, field);
1512 generator.Print(": [");
1513 for (int i = 0; i < size; i++) {
1514 if (i > 0) generator.Print(", ");
1515 PrintFieldValue(message, reflection, field, i, generator);
1516 }
1517 if (single_line_mode_) {
1518 generator.Print("] ");
1519 } else {
1520 generator.Print("]\n");
1521 }
1522 }
1523
PrintFieldName(const Message & message,const Reflection * reflection,const FieldDescriptor * field,TextGenerator & generator) const1524 void TextFormat::Printer::PrintFieldName(const Message& message,
1525 const Reflection* reflection,
1526 const FieldDescriptor* field,
1527 TextGenerator& generator) const {
1528 // if use_field_number_ is true, prints field number instead
1529 // of field name.
1530 if (use_field_number_) {
1531 generator.Print(SimpleItoa(field->number()));
1532 return;
1533 }
1534
1535 const FieldValuePrinter* printer = FindWithDefault(
1536 custom_printers_, field, default_field_value_printer_.get());
1537 generator.Print(printer->PrintFieldName(message, reflection, field));
1538 }
1539
PrintFieldValue(const Message & message,const Reflection * reflection,const FieldDescriptor * field,int index,TextGenerator & generator) const1540 void TextFormat::Printer::PrintFieldValue(
1541 const Message& message,
1542 const Reflection* reflection,
1543 const FieldDescriptor* field,
1544 int index,
1545 TextGenerator& generator) const {
1546 GOOGLE_DCHECK(field->is_repeated() || (index == -1))
1547 << "Index must be -1 for non-repeated fields";
1548
1549 const FieldValuePrinter* printer
1550 = FindWithDefault(custom_printers_, field,
1551 default_field_value_printer_.get());
1552
1553 switch (field->cpp_type()) {
1554 #define OUTPUT_FIELD(CPPTYPE, METHOD) \
1555 case FieldDescriptor::CPPTYPE_##CPPTYPE: \
1556 generator.Print(printer->Print##METHOD(field->is_repeated() \
1557 ? reflection->GetRepeated##METHOD(message, field, index) \
1558 : reflection->Get##METHOD(message, field))); \
1559 break
1560
1561 OUTPUT_FIELD( INT32, Int32);
1562 OUTPUT_FIELD( INT64, Int64);
1563 OUTPUT_FIELD(UINT32, UInt32);
1564 OUTPUT_FIELD(UINT64, UInt64);
1565 OUTPUT_FIELD( FLOAT, Float);
1566 OUTPUT_FIELD(DOUBLE, Double);
1567 OUTPUT_FIELD( BOOL, Bool);
1568 #undef OUTPUT_FIELD
1569
1570 case FieldDescriptor::CPPTYPE_STRING: {
1571 string scratch;
1572 const string& value = field->is_repeated()
1573 ? reflection->GetRepeatedStringReference(
1574 message, field, index, &scratch)
1575 : reflection->GetStringReference(message, field, &scratch);
1576 if (field->type() == FieldDescriptor::TYPE_STRING) {
1577 generator.Print(printer->PrintString(value));
1578 } else {
1579 GOOGLE_DCHECK_EQ(field->type(), FieldDescriptor::TYPE_BYTES);
1580 generator.Print(printer->PrintBytes(value));
1581 }
1582 break;
1583 }
1584
1585 case FieldDescriptor::CPPTYPE_ENUM: {
1586 const EnumValueDescriptor *enum_val = field->is_repeated()
1587 ? reflection->GetRepeatedEnum(message, field, index)
1588 : reflection->GetEnum(message, field);
1589 generator.Print(printer->PrintEnum(enum_val->number(), enum_val->name()));
1590 break;
1591 }
1592
1593 case FieldDescriptor::CPPTYPE_MESSAGE:
1594 Print(field->is_repeated()
1595 ? reflection->GetRepeatedMessage(message, field, index)
1596 : reflection->GetMessage(message, field),
1597 generator);
1598 break;
1599 }
1600 }
1601
Print(const Message & message,io::ZeroCopyOutputStream * output)1602 /* static */ bool TextFormat::Print(const Message& message,
1603 io::ZeroCopyOutputStream* output) {
1604 return Printer().Print(message, output);
1605 }
1606
PrintUnknownFields(const UnknownFieldSet & unknown_fields,io::ZeroCopyOutputStream * output)1607 /* static */ bool TextFormat::PrintUnknownFields(
1608 const UnknownFieldSet& unknown_fields,
1609 io::ZeroCopyOutputStream* output) {
1610 return Printer().PrintUnknownFields(unknown_fields, output);
1611 }
1612
PrintToString(const Message & message,string * output)1613 /* static */ bool TextFormat::PrintToString(
1614 const Message& message, string* output) {
1615 return Printer().PrintToString(message, output);
1616 }
1617
PrintUnknownFieldsToString(const UnknownFieldSet & unknown_fields,string * output)1618 /* static */ bool TextFormat::PrintUnknownFieldsToString(
1619 const UnknownFieldSet& unknown_fields, string* output) {
1620 return Printer().PrintUnknownFieldsToString(unknown_fields, output);
1621 }
1622
PrintFieldValueToString(const Message & message,const FieldDescriptor * field,int index,string * output)1623 /* static */ void TextFormat::PrintFieldValueToString(
1624 const Message& message,
1625 const FieldDescriptor* field,
1626 int index,
1627 string* output) {
1628 return Printer().PrintFieldValueToString(message, field, index, output);
1629 }
1630
ParseFieldValueFromString(const string & input,const FieldDescriptor * field,Message * message)1631 /* static */ bool TextFormat::ParseFieldValueFromString(
1632 const string& input,
1633 const FieldDescriptor* field,
1634 Message* message) {
1635 return Parser().ParseFieldValueFromString(input, field, message);
1636 }
1637
1638 // Prints an integer as hex with a fixed number of digits dependent on the
1639 // integer type.
1640 template<typename IntType>
PaddedHex(IntType value)1641 static string PaddedHex(IntType value) {
1642 string result;
1643 result.reserve(sizeof(value) * 2);
1644 for (int i = sizeof(value) * 2 - 1; i >= 0; i--) {
1645 result.push_back(int_to_hex_digit(value >> (i*4) & 0x0F));
1646 }
1647 return result;
1648 }
1649
PrintUnknownFields(const UnknownFieldSet & unknown_fields,TextGenerator & generator) const1650 void TextFormat::Printer::PrintUnknownFields(
1651 const UnknownFieldSet& unknown_fields, TextGenerator& generator) const {
1652 for (int i = 0; i < unknown_fields.field_count(); i++) {
1653 const UnknownField& field = unknown_fields.field(i);
1654 string field_number = SimpleItoa(field.number());
1655
1656 switch (field.type()) {
1657 case UnknownField::TYPE_VARINT:
1658 generator.Print(field_number);
1659 generator.Print(": ");
1660 generator.Print(SimpleItoa(field.varint()));
1661 if (single_line_mode_) {
1662 generator.Print(" ");
1663 } else {
1664 generator.Print("\n");
1665 }
1666 break;
1667 case UnknownField::TYPE_FIXED32: {
1668 generator.Print(field_number);
1669 generator.Print(": 0x");
1670 char buffer[kFastToBufferSize];
1671 generator.Print(FastHex32ToBuffer(field.fixed32(), buffer));
1672 if (single_line_mode_) {
1673 generator.Print(" ");
1674 } else {
1675 generator.Print("\n");
1676 }
1677 break;
1678 }
1679 case UnknownField::TYPE_FIXED64: {
1680 generator.Print(field_number);
1681 generator.Print(": 0x");
1682 char buffer[kFastToBufferSize];
1683 generator.Print(FastHex64ToBuffer(field.fixed64(), buffer));
1684 if (single_line_mode_) {
1685 generator.Print(" ");
1686 } else {
1687 generator.Print("\n");
1688 }
1689 break;
1690 }
1691 case UnknownField::TYPE_LENGTH_DELIMITED: {
1692 generator.Print(field_number);
1693 const string& value = field.length_delimited();
1694 UnknownFieldSet embedded_unknown_fields;
1695 if (!value.empty() && embedded_unknown_fields.ParseFromString(value)) {
1696 // This field is parseable as a Message.
1697 // So it is probably an embedded message.
1698 if (single_line_mode_) {
1699 generator.Print(" { ");
1700 } else {
1701 generator.Print(" {\n");
1702 generator.Indent();
1703 }
1704 PrintUnknownFields(embedded_unknown_fields, generator);
1705 if (single_line_mode_) {
1706 generator.Print("} ");
1707 } else {
1708 generator.Outdent();
1709 generator.Print("}\n");
1710 }
1711 } else {
1712 // This field is not parseable as a Message.
1713 // So it is probably just a plain string.
1714 generator.Print(": \"");
1715 generator.Print(CEscape(value));
1716 generator.Print("\"");
1717 if (single_line_mode_) {
1718 generator.Print(" ");
1719 } else {
1720 generator.Print("\n");
1721 }
1722 }
1723 break;
1724 }
1725 case UnknownField::TYPE_GROUP:
1726 generator.Print(field_number);
1727 if (single_line_mode_) {
1728 generator.Print(" { ");
1729 } else {
1730 generator.Print(" {\n");
1731 generator.Indent();
1732 }
1733 PrintUnknownFields(field.group(), generator);
1734 if (single_line_mode_) {
1735 generator.Print("} ");
1736 } else {
1737 generator.Outdent();
1738 generator.Print("}\n");
1739 }
1740 break;
1741 }
1742 }
1743 }
1744
1745 } // namespace protobuf
1746 } // namespace google
1747