1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: jschorr@google.com (Joseph Schorr)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // Utilities for printing and parsing protocol messages in a human-readable,
36 // text-based format.
37 
38 #ifndef GOOGLE_PROTOBUF_TEXT_FORMAT_H__
39 #define GOOGLE_PROTOBUF_TEXT_FORMAT_H__
40 
41 #include <map>
42 #include <memory>
43 #ifndef _SHARED_PTR_H
44 #include <google/protobuf/stubs/shared_ptr.h>
45 #endif
46 #include <string>
47 #include <vector>
48 
49 #include <google/protobuf/stubs/common.h>
50 #include <google/protobuf/descriptor.h>
51 #include <google/protobuf/message.h>
52 
53 namespace google {
54 namespace protobuf {
55 
56 namespace io {
57   class ErrorCollector;      // tokenizer.h
58 }
59 
60 // This class implements protocol buffer text format.  Printing and parsing
61 // protocol messages in text format is useful for debugging and human editing
62 // of messages.
63 //
64 // This class is really a namespace that contains only static methods.
65 class LIBPROTOBUF_EXPORT TextFormat {
66  public:
67   // Outputs a textual representation of the given message to the given
68   // output stream.
69   static bool Print(const Message& message, io::ZeroCopyOutputStream* output);
70 
71   // Print the fields in an UnknownFieldSet.  They are printed by tag number
72   // only.  Embedded messages are heuristically identified by attempting to
73   // parse them.
74   static bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
75                                  io::ZeroCopyOutputStream* output);
76 
77   // Like Print(), but outputs directly to a string.
78   static bool PrintToString(const Message& message, string* output);
79 
80   // Like PrintUnknownFields(), but outputs directly to a string.
81   static bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
82                                          string* output);
83 
84   // Outputs a textual representation of the value of the field supplied on
85   // the message supplied. For non-repeated fields, an index of -1 must
86   // be supplied. Note that this method will print the default value for a
87   // field if it is not set.
88   static void PrintFieldValueToString(const Message& message,
89                                       const FieldDescriptor* field,
90                                       int index,
91                                       string* output);
92 
93   // The default printer that converts scalar values from fields into
94   // their string representation.
95   // You can derive from this FieldValuePrinter if you want to have
96   // fields to be printed in a different way and register it at the
97   // Printer.
98   class LIBPROTOBUF_EXPORT FieldValuePrinter {
99    public:
100     FieldValuePrinter();
101     virtual ~FieldValuePrinter();
102     virtual string PrintBool(bool val) const;
103     virtual string PrintInt32(int32 val) const;
104     virtual string PrintUInt32(uint32 val) const;
105     virtual string PrintInt64(int64 val) const;
106     virtual string PrintUInt64(uint64 val) const;
107     virtual string PrintFloat(float val) const;
108     virtual string PrintDouble(double val) const;
109     virtual string PrintString(const string& val) const;
110     virtual string PrintBytes(const string& val) const;
111     virtual string PrintEnum(int32 val, const string& name) const;
112     virtual string PrintFieldName(const Message& message,
113                                   const Reflection* reflection,
114                                   const FieldDescriptor* field) const;
115     virtual string PrintMessageStart(const Message& message,
116                                      int field_index,
117                                      int field_count,
118                                      bool single_line_mode) const;
119     virtual string PrintMessageEnd(const Message& message,
120                                    int field_index,
121                                    int field_count,
122                                    bool single_line_mode) const;
123 
124    private:
125     GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FieldValuePrinter);
126   };
127 
128   // Class for those users which require more fine-grained control over how
129   // a protobuffer message is printed out.
130   class LIBPROTOBUF_EXPORT Printer {
131    public:
132     Printer();
133     ~Printer();
134 
135     // Like TextFormat::Print
136     bool Print(const Message& message, io::ZeroCopyOutputStream* output) const;
137     // Like TextFormat::PrintUnknownFields
138     bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
139                             io::ZeroCopyOutputStream* output) const;
140     // Like TextFormat::PrintToString
141     bool PrintToString(const Message& message, string* output) const;
142     // Like TextFormat::PrintUnknownFieldsToString
143     bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
144                                     string* output) const;
145     // Like TextFormat::PrintFieldValueToString
146     void PrintFieldValueToString(const Message& message,
147                                  const FieldDescriptor* field,
148                                  int index,
149                                  string* output) const;
150 
151     // Adjust the initial indent level of all output.  Each indent level is
152     // equal to two spaces.
SetInitialIndentLevel(int indent_level)153     void SetInitialIndentLevel(int indent_level) {
154       initial_indent_level_ = indent_level;
155     }
156 
157     // If printing in single line mode, then the entire message will be output
158     // on a single line with no line breaks.
SetSingleLineMode(bool single_line_mode)159     void SetSingleLineMode(bool single_line_mode) {
160       single_line_mode_ = single_line_mode;
161     }
162 
IsInSingleLineMode()163     bool IsInSingleLineMode() {
164       return single_line_mode_;
165     }
166 
167     // If use_field_number is true, uses field number instead of field name.
SetUseFieldNumber(bool use_field_number)168     void SetUseFieldNumber(bool use_field_number) {
169       use_field_number_ = use_field_number;
170     }
171 
172     // Set true to print repeated primitives in a format like:
173     //   field_name: [1, 2, 3, 4]
174     // instead of printing each value on its own line.  Short format applies
175     // only to primitive values -- i.e. everything except strings and
176     // sub-messages/groups.
SetUseShortRepeatedPrimitives(bool use_short_repeated_primitives)177     void SetUseShortRepeatedPrimitives(bool use_short_repeated_primitives) {
178       use_short_repeated_primitives_ = use_short_repeated_primitives;
179     }
180 
181     // Set true to output UTF-8 instead of ASCII.  The only difference
182     // is that bytes >= 0x80 in string fields will not be escaped,
183     // because they are assumed to be part of UTF-8 multi-byte
184     // sequences. This will change the default FieldValuePrinter.
185     void SetUseUtf8StringEscaping(bool as_utf8);
186 
187     // Set the default FieldValuePrinter that is used for all fields that
188     // don't have a field-specific printer registered.
189     // Takes ownership of the printer.
190     void SetDefaultFieldValuePrinter(const FieldValuePrinter* printer);
191 
192     // Sets whether we want to hide unknown fields or not.
193     // Usually unknown fields are printed in a generic way that includes the
194     // tag number of the field instead of field name. However, sometimes it
195     // is useful to be able to print the message without unknown fields (e.g.
196     // for the python protobuf version to maintain consistency between its pure
197     // python and c++ implementations).
SetHideUnknownFields(bool hide)198     void SetHideUnknownFields(bool hide) {
199       hide_unknown_fields_ = hide;
200     }
201 
202     // If print_message_fields_in_index_order is true, print fields of a proto
203     // message using the order defined in source code instead of the field
204     // number. By default, use the field number order.
SetPrintMessageFieldsInIndexOrder(bool print_message_fields_in_index_order)205     void SetPrintMessageFieldsInIndexOrder(
206         bool print_message_fields_in_index_order) {
207       print_message_fields_in_index_order_ =
208           print_message_fields_in_index_order;
209     }
210 
211     // If expand==true, expand google.protobuf.Any payloads. The output
212     // will be of form
213     //    [type_url] { <value_printed_in_text> }
214     //
215     // If expand==false, print Any using the default printer. The output will
216     // look like
217     //    type_url: "<type_url>"  value: "serialized_content"
SetExpandAny(bool expand)218     void SetExpandAny(bool expand) {
219       expand_any_ = expand;
220     }
221 
222     // If non-zero, we truncate all string fields that are  longer than this
223     // threshold.  This is useful when the proto message has very long strings,
224     // e.g., dump of encoded image file.
225     //
226     // NOTE(hfgong):  Setting a non-zero value breaks round-trip safe
227     // property of TextFormat::Printer.  That is, from the printed message, we
228     // cannot fully recover the original string field any more.
SetTruncateStringFieldLongerThan(const int64 truncate_string_field_longer_than)229     void SetTruncateStringFieldLongerThan(
230         const int64 truncate_string_field_longer_than) {
231       truncate_string_field_longer_than_ = truncate_string_field_longer_than;
232     }
233 
234     // Register a custom field-specific FieldValuePrinter for fields
235     // with a particular FieldDescriptor.
236     // Returns "true" if the registration succeeded, or "false", if there is
237     // already a printer for that FieldDescriptor.
238     // Takes ownership of the printer on successful registration.
239     bool RegisterFieldValuePrinter(const FieldDescriptor* field,
240                                    const FieldValuePrinter* printer);
241 
242    private:
243     // Forward declaration of an internal class used to print the text
244     // output to the OutputStream (see text_format.cc for implementation).
245     class TextGenerator;
246 
247     // Internal Print method, used for writing to the OutputStream via
248     // the TextGenerator class.
249     void Print(const Message& message,
250                TextGenerator& generator) const;
251 
252     // Print a single field.
253     void PrintField(const Message& message,
254                     const Reflection* reflection,
255                     const FieldDescriptor* field,
256                     TextGenerator& generator) const;
257 
258     // Print a repeated primitive field in short form.
259     void PrintShortRepeatedField(const Message& message,
260                                  const Reflection* reflection,
261                                  const FieldDescriptor* field,
262                                  TextGenerator& generator) const;
263 
264     // Print the name of a field -- i.e. everything that comes before the
265     // ':' for a single name/value pair.
266     void PrintFieldName(const Message& message,
267                         const Reflection* reflection,
268                         const FieldDescriptor* field,
269                         TextGenerator& generator) const;
270 
271     // Outputs a textual representation of the value of the field supplied on
272     // the message supplied or the default value if not set.
273     void PrintFieldValue(const Message& message,
274                          const Reflection* reflection,
275                          const FieldDescriptor* field,
276                          int index,
277                          TextGenerator& generator) const;
278 
279     // Print the fields in an UnknownFieldSet.  They are printed by tag number
280     // only.  Embedded messages are heuristically identified by attempting to
281     // parse them.
282     void PrintUnknownFields(const UnknownFieldSet& unknown_fields,
283                             TextGenerator& generator) const;
284 
285     bool PrintAny(const Message& message, TextGenerator& generator) const;
286 
287     int initial_indent_level_;
288 
289     bool single_line_mode_;
290 
291     bool use_field_number_;
292 
293     bool use_short_repeated_primitives_;
294 
295     bool hide_unknown_fields_;
296 
297     bool print_message_fields_in_index_order_;
298 
299     bool expand_any_;
300 
301     int64 truncate_string_field_longer_than_;
302 
303     google::protobuf::scoped_ptr<const FieldValuePrinter> default_field_value_printer_;
304     typedef map<const FieldDescriptor*,
305                 const FieldValuePrinter*> CustomPrinterMap;
306     CustomPrinterMap custom_printers_;
307   };
308 
309   // Parses a text-format protocol message from the given input stream to
310   // the given message object. This function parses the human-readable format
311   // written by Print(). Returns true on success. The message is cleared first,
312   // even if the function fails -- See Merge() to avoid this behavior.
313   //
314   // Example input: "user {\n id: 123 extra { gender: MALE language: 'en' }\n}"
315   //
316   // One use for this function is parsing handwritten strings in test code.
317   // Another use is to parse the output from google::protobuf::Message::DebugString()
318   // (or ShortDebugString()), because these functions output using
319   // google::protobuf::TextFormat::Print().
320   //
321   // If you would like to read a protocol buffer serialized in the
322   // (non-human-readable) binary wire format, see
323   // google::protobuf::MessageLite::ParseFromString().
324   static bool Parse(io::ZeroCopyInputStream* input, Message* output);
325   // Like Parse(), but reads directly from a string.
326   static bool ParseFromString(const string& input, Message* output);
327 
328   // Like Parse(), but the data is merged into the given message, as if
329   // using Message::MergeFrom().
330   static bool Merge(io::ZeroCopyInputStream* input, Message* output);
331   // Like Merge(), but reads directly from a string.
332   static bool MergeFromString(const string& input, Message* output);
333 
334   // Parse the given text as a single field value and store it into the
335   // given field of the given message. If the field is a repeated field,
336   // the new value will be added to the end
337   static bool ParseFieldValueFromString(const string& input,
338                                         const FieldDescriptor* field,
339                                         Message* message);
340 
341   // Interface that TextFormat::Parser can use to find extensions.
342   // This class may be extended in the future to find more information
343   // like fields, etc.
344   class LIBPROTOBUF_EXPORT Finder {
345    public:
346     virtual ~Finder();
347 
348     // Try to find an extension of *message by fully-qualified field
349     // name.  Returns NULL if no extension is known for this name or number.
350     virtual const FieldDescriptor* FindExtension(
351         Message* message,
352         const string& name) const = 0;
353   };
354 
355   // A location in the parsed text.
356   struct ParseLocation {
357     int line;
358     int column;
359 
ParseLocationParseLocation360     ParseLocation() : line(-1), column(-1) {}
ParseLocationParseLocation361     ParseLocation(int line_param, int column_param)
362         : line(line_param), column(column_param) {}
363   };
364 
365   // Data structure which is populated with the locations of each field
366   // value parsed from the text.
367   class LIBPROTOBUF_EXPORT ParseInfoTree {
368    public:
369     ParseInfoTree();
370     ~ParseInfoTree();
371 
372     // Returns the parse location for index-th value of the field in the parsed
373     // text. If none exists, returns a location with line = -1. Index should be
374     // -1 for not-repeated fields.
375     ParseLocation GetLocation(const FieldDescriptor* field, int index) const;
376 
377     // Returns the parse info tree for the given field, which must be a message
378     // type. The nested information tree is owned by the root tree and will be
379     // deleted when it is deleted.
380     ParseInfoTree* GetTreeForNested(const FieldDescriptor* field,
381                                     int index) const;
382 
383    private:
384     // Allow the text format parser to record information into the tree.
385     friend class TextFormat;
386 
387     // Records the starting location of a single value for a field.
388     void RecordLocation(const FieldDescriptor* field, ParseLocation location);
389 
390     // Create and records a nested tree for a nested message field.
391     ParseInfoTree* CreateNested(const FieldDescriptor* field);
392 
393     // Defines the map from the index-th field descriptor to its parse location.
394     typedef map<const FieldDescriptor*, vector<ParseLocation> > LocationMap;
395 
396     // Defines the map from the index-th field descriptor to the nested parse
397     // info tree.
398     typedef map<const FieldDescriptor*, vector<ParseInfoTree*> > NestedMap;
399 
400     LocationMap locations_;
401     NestedMap nested_;
402 
403     GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParseInfoTree);
404   };
405 
406   // For more control over parsing, use this class.
407   class LIBPROTOBUF_EXPORT Parser {
408    public:
409     Parser();
410     ~Parser();
411 
412     // Like TextFormat::Parse().
413     bool Parse(io::ZeroCopyInputStream* input, Message* output);
414     // Like TextFormat::ParseFromString().
415     bool ParseFromString(const string& input, Message* output);
416     // Like TextFormat::Merge().
417     bool Merge(io::ZeroCopyInputStream* input, Message* output);
418     // Like TextFormat::MergeFromString().
419     bool MergeFromString(const string& input, Message* output);
420 
421     // Set where to report parse errors.  If NULL (the default), errors will
422     // be printed to stderr.
RecordErrorsTo(io::ErrorCollector * error_collector)423     void RecordErrorsTo(io::ErrorCollector* error_collector) {
424       error_collector_ = error_collector;
425     }
426 
427     // Set how parser finds extensions.  If NULL (the default), the
428     // parser will use the standard Reflection object associated with
429     // the message being parsed.
SetFinder(Finder * finder)430     void SetFinder(Finder* finder) {
431       finder_ = finder;
432     }
433 
434     // Sets where location information about the parse will be written. If NULL
435     // (the default), then no location will be written.
WriteLocationsTo(ParseInfoTree * tree)436     void WriteLocationsTo(ParseInfoTree* tree) {
437       parse_info_tree_ = tree;
438     }
439 
440     // Normally parsing fails if, after parsing, output->IsInitialized()
441     // returns false.  Call AllowPartialMessage(true) to skip this check.
AllowPartialMessage(bool allow)442     void AllowPartialMessage(bool allow) {
443       allow_partial_ = allow;
444     }
445 
446     // Allow field names to be matched case-insensitively.
447     // This is not advisable if there are fields that only differ in case, or
448     // if you want to enforce writing in the canonical form.
449     // This is 'false' by default.
AllowCaseInsensitiveField(bool allow)450     void AllowCaseInsensitiveField(bool allow) {
451       allow_case_insensitive_field_ = allow;
452     }
453 
454     // Like TextFormat::ParseFieldValueFromString
455     bool ParseFieldValueFromString(const string& input,
456                                    const FieldDescriptor* field,
457                                    Message* output);
458 
459 
AllowFieldNumber(bool allow)460     void AllowFieldNumber(bool allow) {
461       allow_field_number_ = allow;
462     }
463 
464    private:
465     // Forward declaration of an internal class used to parse text
466     // representations (see text_format.cc for implementation).
467     class ParserImpl;
468 
469     // Like TextFormat::Merge().  The provided implementation is used
470     // to do the parsing.
471     bool MergeUsingImpl(io::ZeroCopyInputStream* input,
472                         Message* output,
473                         ParserImpl* parser_impl);
474 
475     io::ErrorCollector* error_collector_;
476     Finder* finder_;
477     ParseInfoTree* parse_info_tree_;
478     bool allow_partial_;
479     bool allow_case_insensitive_field_;
480     bool allow_unknown_field_;
481     bool allow_unknown_enum_;
482     bool allow_field_number_;
483     bool allow_relaxed_whitespace_;
484     bool allow_singular_overwrites_;
485   };
486 
487 
488  private:
489   // Hack: ParseInfoTree declares TextFormat as a friend which should extend
490   // the friendship to TextFormat::Parser::ParserImpl, but unfortunately some
491   // old compilers (e.g. GCC 3.4.6) don't implement this correctly. We provide
492   // helpers for ParserImpl to call methods of ParseInfoTree.
493   static inline void RecordLocation(ParseInfoTree* info_tree,
494                                     const FieldDescriptor* field,
495                                     ParseLocation location);
496   static inline ParseInfoTree* CreateNested(ParseInfoTree* info_tree,
497                                             const FieldDescriptor* field);
498 
499   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextFormat);
500 };
501 
RecordLocation(ParseInfoTree * info_tree,const FieldDescriptor * field,ParseLocation location)502 inline void TextFormat::RecordLocation(ParseInfoTree* info_tree,
503                                        const FieldDescriptor* field,
504                                        ParseLocation location) {
505   info_tree->RecordLocation(field, location);
506 }
507 
508 
CreateNested(ParseInfoTree * info_tree,const FieldDescriptor * field)509 inline TextFormat::ParseInfoTree* TextFormat::CreateNested(
510     ParseInfoTree* info_tree, const FieldDescriptor* field) {
511   return info_tree->CreateNested(field);
512 }
513 
514 }  // namespace protobuf
515 
516 }  // namespace google
517 #endif  // GOOGLE_PROTOBUF_TEXT_FORMAT_H__
518