1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: jschorr@google.com (Joseph Schorr)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // Utilities for printing and parsing protocol messages in a human-readable,
36 // text-based format.
37 
38 #ifndef GOOGLE_PROTOBUF_TEXT_FORMAT_H__
39 #define GOOGLE_PROTOBUF_TEXT_FORMAT_H__
40 
41 #include <map>
42 #include <memory>
43 #include <string>
44 #include <vector>
45 
46 #include <google/protobuf/stubs/common.h>
47 #include <google/protobuf/descriptor.h>
48 #include <google/protobuf/message.h>
49 
50 namespace google {
51 namespace protobuf {
52 
53 namespace io {
54   class ErrorCollector;      // tokenizer.h
55 }
56 
57 // This class implements protocol buffer text format.  Printing and parsing
58 // protocol messages in text format is useful for debugging and human editing
59 // of messages.
60 //
61 // This class is really a namespace that contains only static methods.
62 class LIBPROTOBUF_EXPORT TextFormat {
63  public:
64   // Outputs a textual representation of the given message to the given
65   // output stream.
66   static bool Print(const Message& message, io::ZeroCopyOutputStream* output);
67 
68   // Print the fields in an UnknownFieldSet.  They are printed by tag number
69   // only.  Embedded messages are heuristically identified by attempting to
70   // parse them.
71   static bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
72                                  io::ZeroCopyOutputStream* output);
73 
74   // Like Print(), but outputs directly to a string.
75   static bool PrintToString(const Message& message, string* output);
76 
77   // Like PrintUnknownFields(), but outputs directly to a string.
78   static bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
79                                          string* output);
80 
81   // Outputs a textual representation of the value of the field supplied on
82   // the message supplied. For non-repeated fields, an index of -1 must
83   // be supplied. Note that this method will print the default value for a
84   // field if it is not set.
85   static void PrintFieldValueToString(const Message& message,
86                                       const FieldDescriptor* field,
87                                       int index,
88                                       string* output);
89 
90   // The default printer that converts scalar values from fields into
91   // their string representation.
92   // You can derive from this FieldValuePrinter if you want to have
93   // fields to be printed in a different way and register it at the
94   // Printer.
95   class LIBPROTOBUF_EXPORT FieldValuePrinter {
96    public:
97     FieldValuePrinter();
98     virtual ~FieldValuePrinter();
99     virtual string PrintBool(bool val) const;
100     virtual string PrintInt32(int32 val) const;
101     virtual string PrintUInt32(uint32 val) const;
102     virtual string PrintInt64(int64 val) const;
103     virtual string PrintUInt64(uint64 val) const;
104     virtual string PrintFloat(float val) const;
105     virtual string PrintDouble(double val) const;
106     virtual string PrintString(const string& val) const;
107     virtual string PrintBytes(const string& val) const;
108     virtual string PrintEnum(int32 val, const string& name) const;
109     virtual string PrintFieldName(const Message& message,
110                                   const Reflection* reflection,
111                                   const FieldDescriptor* field) const;
112     virtual string PrintMessageStart(const Message& message,
113                                      int field_index,
114                                      int field_count,
115                                      bool single_line_mode) const;
116     virtual string PrintMessageEnd(const Message& message,
117                                    int field_index,
118                                    int field_count,
119                                    bool single_line_mode) const;
120 
121    private:
122     GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FieldValuePrinter);
123   };
124 
125   // Class for those users which require more fine-grained control over how
126   // a protobuffer message is printed out.
127   class LIBPROTOBUF_EXPORT Printer {
128    public:
129     Printer();
130     ~Printer();
131 
132     // Like TextFormat::Print
133     bool Print(const Message& message, io::ZeroCopyOutputStream* output) const;
134     // Like TextFormat::PrintUnknownFields
135     bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
136                             io::ZeroCopyOutputStream* output) const;
137     // Like TextFormat::PrintToString
138     bool PrintToString(const Message& message, string* output) const;
139     // Like TextFormat::PrintUnknownFieldsToString
140     bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
141                                     string* output) const;
142     // Like TextFormat::PrintFieldValueToString
143     void PrintFieldValueToString(const Message& message,
144                                  const FieldDescriptor* field,
145                                  int index,
146                                  string* output) const;
147 
148     // Adjust the initial indent level of all output.  Each indent level is
149     // equal to two spaces.
SetInitialIndentLevel(int indent_level)150     void SetInitialIndentLevel(int indent_level) {
151       initial_indent_level_ = indent_level;
152     }
153 
154     // If printing in single line mode, then the entire message will be output
155     // on a single line with no line breaks.
SetSingleLineMode(bool single_line_mode)156     void SetSingleLineMode(bool single_line_mode) {
157       single_line_mode_ = single_line_mode;
158     }
159 
IsInSingleLineMode()160     bool IsInSingleLineMode() {
161       return single_line_mode_;
162     }
163 
164     // If use_field_number is true, uses field number instead of field name.
SetUseFieldNumber(bool use_field_number)165     void SetUseFieldNumber(bool use_field_number) {
166       use_field_number_ = use_field_number;
167     }
168 
169     // Set true to print repeated primitives in a format like:
170     //   field_name: [1, 2, 3, 4]
171     // instead of printing each value on its own line.  Short format applies
172     // only to primitive values -- i.e. everything except strings and
173     // sub-messages/groups.
SetUseShortRepeatedPrimitives(bool use_short_repeated_primitives)174     void SetUseShortRepeatedPrimitives(bool use_short_repeated_primitives) {
175       use_short_repeated_primitives_ = use_short_repeated_primitives;
176     }
177 
178     // Set true to output UTF-8 instead of ASCII.  The only difference
179     // is that bytes >= 0x80 in string fields will not be escaped,
180     // because they are assumed to be part of UTF-8 multi-byte
181     // sequences. This will change the default FieldValuePrinter.
182     void SetUseUtf8StringEscaping(bool as_utf8);
183 
184     // Set the default FieldValuePrinter that is used for all fields that
185     // don't have a field-specific printer registered.
186     // Takes ownership of the printer.
187     void SetDefaultFieldValuePrinter(const FieldValuePrinter* printer);
188 
189     // Sets whether we want to hide unknown fields or not.
190     // Usually unknown fields are printed in a generic way that includes the
191     // tag number of the field instead of field name. However, sometimes it
192     // is useful to be able to print the message without unknown fields (e.g.
193     // for the python protobuf version to maintain consistency between its pure
194     // python and c++ implementations).
SetHideUnknownFields(bool hide)195     void SetHideUnknownFields(bool hide) {
196       hide_unknown_fields_ = hide;
197     }
198 
199     // If print_message_fields_in_index_order is true, print fields of a proto
200     // message using the order defined in source code instead of the field
201     // number. By default, use the field number order.
SetPrintMessageFieldsInIndexOrder(bool print_message_fields_in_index_order)202     void SetPrintMessageFieldsInIndexOrder(
203         bool print_message_fields_in_index_order) {
204       print_message_fields_in_index_order_ =
205           print_message_fields_in_index_order;
206     }
207 
208     // Register a custom field-specific FieldValuePrinter for fields
209     // with a particular FieldDescriptor.
210     // Returns "true" if the registration succeeded, or "false", if there is
211     // already a printer for that FieldDescriptor.
212     // Takes ownership of the printer on successful registration.
213     bool RegisterFieldValuePrinter(const FieldDescriptor* field,
214                                    const FieldValuePrinter* printer);
215 
216    private:
217     // Forward declaration of an internal class used to print the text
218     // output to the OutputStream (see text_format.cc for implementation).
219     class TextGenerator;
220 
221     // Internal Print method, used for writing to the OutputStream via
222     // the TextGenerator class.
223     void Print(const Message& message,
224                TextGenerator& generator) const;
225 
226     // Print a single field.
227     void PrintField(const Message& message,
228                     const Reflection* reflection,
229                     const FieldDescriptor* field,
230                     TextGenerator& generator) const;
231 
232     // Print a repeated primitive field in short form.
233     void PrintShortRepeatedField(const Message& message,
234                                  const Reflection* reflection,
235                                  const FieldDescriptor* field,
236                                  TextGenerator& generator) const;
237 
238     // Print the name of a field -- i.e. everything that comes before the
239     // ':' for a single name/value pair.
240     void PrintFieldName(const Message& message,
241                         const Reflection* reflection,
242                         const FieldDescriptor* field,
243                         TextGenerator& generator) const;
244 
245     // Outputs a textual representation of the value of the field supplied on
246     // the message supplied or the default value if not set.
247     void PrintFieldValue(const Message& message,
248                          const Reflection* reflection,
249                          const FieldDescriptor* field,
250                          int index,
251                          TextGenerator& generator) const;
252 
253     // Print the fields in an UnknownFieldSet.  They are printed by tag number
254     // only.  Embedded messages are heuristically identified by attempting to
255     // parse them.
256     void PrintUnknownFields(const UnknownFieldSet& unknown_fields,
257                             TextGenerator& generator) const;
258 
259     int initial_indent_level_;
260 
261     bool single_line_mode_;
262 
263     bool use_field_number_;
264 
265     bool use_short_repeated_primitives_;
266 
267     bool hide_unknown_fields_;
268 
269     bool print_message_fields_in_index_order_;
270 
271     scoped_ptr<const FieldValuePrinter> default_field_value_printer_;
272     typedef map<const FieldDescriptor*,
273                 const FieldValuePrinter*> CustomPrinterMap;
274     CustomPrinterMap custom_printers_;
275   };
276 
277   // Parses a text-format protocol message from the given input stream to
278   // the given message object.  This function parses the format written
279   // by Print().
280   static bool Parse(io::ZeroCopyInputStream* input, Message* output);
281   // Like Parse(), but reads directly from a string.
282   static bool ParseFromString(const string& input, Message* output);
283 
284   // Like Parse(), but the data is merged into the given message, as if
285   // using Message::MergeFrom().
286   static bool Merge(io::ZeroCopyInputStream* input, Message* output);
287   // Like Merge(), but reads directly from a string.
288   static bool MergeFromString(const string& input, Message* output);
289 
290   // Parse the given text as a single field value and store it into the
291   // given field of the given message. If the field is a repeated field,
292   // the new value will be added to the end
293   static bool ParseFieldValueFromString(const string& input,
294                                         const FieldDescriptor* field,
295                                         Message* message);
296 
297   // Interface that TextFormat::Parser can use to find extensions.
298   // This class may be extended in the future to find more information
299   // like fields, etc.
300   class LIBPROTOBUF_EXPORT Finder {
301    public:
302     virtual ~Finder();
303 
304     // Try to find an extension of *message by fully-qualified field
305     // name.  Returns NULL if no extension is known for this name or number.
306     virtual const FieldDescriptor* FindExtension(
307         Message* message,
308         const string& name) const = 0;
309   };
310 
311   // A location in the parsed text.
312   struct ParseLocation {
313     int line;
314     int column;
315 
ParseLocationParseLocation316     ParseLocation() : line(-1), column(-1) {}
ParseLocationParseLocation317     ParseLocation(int line_param, int column_param)
318         : line(line_param), column(column_param) {}
319   };
320 
321   // Data structure which is populated with the locations of each field
322   // value parsed from the text.
323   class LIBPROTOBUF_EXPORT ParseInfoTree {
324    public:
325     ParseInfoTree();
326     ~ParseInfoTree();
327 
328     // Returns the parse location for index-th value of the field in the parsed
329     // text. If none exists, returns a location with line = -1. Index should be
330     // -1 for not-repeated fields.
331     ParseLocation GetLocation(const FieldDescriptor* field, int index) const;
332 
333     // Returns the parse info tree for the given field, which must be a message
334     // type. The nested information tree is owned by the root tree and will be
335     // deleted when it is deleted.
336     ParseInfoTree* GetTreeForNested(const FieldDescriptor* field,
337                                     int index) const;
338 
339    private:
340     // Allow the text format parser to record information into the tree.
341     friend class TextFormat;
342 
343     // Records the starting location of a single value for a field.
344     void RecordLocation(const FieldDescriptor* field, ParseLocation location);
345 
346     // Create and records a nested tree for a nested message field.
347     ParseInfoTree* CreateNested(const FieldDescriptor* field);
348 
349     // Defines the map from the index-th field descriptor to its parse location.
350     typedef map<const FieldDescriptor*, vector<ParseLocation> > LocationMap;
351 
352     // Defines the map from the index-th field descriptor to the nested parse
353     // info tree.
354     typedef map<const FieldDescriptor*, vector<ParseInfoTree*> > NestedMap;
355 
356     LocationMap locations_;
357     NestedMap nested_;
358 
359     GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParseInfoTree);
360   };
361 
362   // For more control over parsing, use this class.
363   class LIBPROTOBUF_EXPORT Parser {
364    public:
365     Parser();
366     ~Parser();
367 
368     // Like TextFormat::Parse().
369     bool Parse(io::ZeroCopyInputStream* input, Message* output);
370     // Like TextFormat::ParseFromString().
371     bool ParseFromString(const string& input, Message* output);
372     // Like TextFormat::Merge().
373     bool Merge(io::ZeroCopyInputStream* input, Message* output);
374     // Like TextFormat::MergeFromString().
375     bool MergeFromString(const string& input, Message* output);
376 
377     // Set where to report parse errors.  If NULL (the default), errors will
378     // be printed to stderr.
RecordErrorsTo(io::ErrorCollector * error_collector)379     void RecordErrorsTo(io::ErrorCollector* error_collector) {
380       error_collector_ = error_collector;
381     }
382 
383     // Set how parser finds extensions.  If NULL (the default), the
384     // parser will use the standard Reflection object associated with
385     // the message being parsed.
SetFinder(Finder * finder)386     void SetFinder(Finder* finder) {
387       finder_ = finder;
388     }
389 
390     // Sets where location information about the parse will be written. If NULL
391     // (the default), then no location will be written.
WriteLocationsTo(ParseInfoTree * tree)392     void WriteLocationsTo(ParseInfoTree* tree) {
393       parse_info_tree_ = tree;
394     }
395 
396     // Normally parsing fails if, after parsing, output->IsInitialized()
397     // returns false.  Call AllowPartialMessage(true) to skip this check.
AllowPartialMessage(bool allow)398     void AllowPartialMessage(bool allow) {
399       allow_partial_ = allow;
400     }
401 
402     // Allow field names to be matched case-insensitively.
403     // This is not advisable if there are fields that only differ in case, or
404     // if you want to enforce writing in the canonical form.
405     // This is 'false' by default.
AllowCaseInsensitiveField(bool allow)406     void AllowCaseInsensitiveField(bool allow) {
407       allow_case_insensitive_field_ = allow;
408     }
409 
410     // Like TextFormat::ParseFieldValueFromString
411     bool ParseFieldValueFromString(const string& input,
412                                    const FieldDescriptor* field,
413                                    Message* output);
414 
415 
AllowFieldNumber(bool allow)416     void AllowFieldNumber(bool allow) {
417       allow_field_number_ = allow;
418     }
419 
420    private:
421     // Forward declaration of an internal class used to parse text
422     // representations (see text_format.cc for implementation).
423     class ParserImpl;
424 
425     // Like TextFormat::Merge().  The provided implementation is used
426     // to do the parsing.
427     bool MergeUsingImpl(io::ZeroCopyInputStream* input,
428                         Message* output,
429                         ParserImpl* parser_impl);
430 
431     io::ErrorCollector* error_collector_;
432     Finder* finder_;
433     ParseInfoTree* parse_info_tree_;
434     bool allow_partial_;
435     bool allow_case_insensitive_field_;
436     bool allow_unknown_field_;
437     bool allow_unknown_enum_;
438     bool allow_field_number_;
439     bool allow_relaxed_whitespace_;
440     bool allow_singular_overwrites_;
441   };
442 
443 
444  private:
445   // Hack: ParseInfoTree declares TextFormat as a friend which should extend
446   // the friendship to TextFormat::Parser::ParserImpl, but unfortunately some
447   // old compilers (e.g. GCC 3.4.6) don't implement this correctly. We provide
448   // helpers for ParserImpl to call methods of ParseInfoTree.
449   static inline void RecordLocation(ParseInfoTree* info_tree,
450                                     const FieldDescriptor* field,
451                                     ParseLocation location);
452   static inline ParseInfoTree* CreateNested(ParseInfoTree* info_tree,
453                                             const FieldDescriptor* field);
454 
455   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextFormat);
456 };
457 
RecordLocation(ParseInfoTree * info_tree,const FieldDescriptor * field,ParseLocation location)458 inline void TextFormat::RecordLocation(ParseInfoTree* info_tree,
459                                        const FieldDescriptor* field,
460                                        ParseLocation location) {
461   info_tree->RecordLocation(field, location);
462 }
463 
464 
CreateNested(ParseInfoTree * info_tree,const FieldDescriptor * field)465 inline TextFormat::ParseInfoTree* TextFormat::CreateNested(
466     ParseInfoTree* info_tree, const FieldDescriptor* field) {
467   return info_tree->CreateNested(field);
468 }
469 
470 }  // namespace protobuf
471 
472 }  // namespace google
473 #endif  // GOOGLE_PROTOBUF_TEXT_FORMAT_H__
474