1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //         atenasio@google.com (Chris Atenasio) (ZigZag transform)
33 //  Based on original Protocol Buffers design by
34 //  Sanjay Ghemawat, Jeff Dean, and others.
35 //
36 // This header is logically internal, but is made public because it is used
37 // from protocol-compiler-generated code, which may reside in other components.
38 
39 #ifndef GOOGLE_PROTOBUF_WIRE_FORMAT_H__
40 #define GOOGLE_PROTOBUF_WIRE_FORMAT_H__
41 
42 #include <string>
43 #include <google/protobuf/stubs/common.h>
44 #include <google/protobuf/descriptor.pb.h>
45 #include <google/protobuf/descriptor.h>
46 #include <google/protobuf/message.h>
47 #include <google/protobuf/wire_format_lite.h>
48 
49 // Do UTF-8 validation on string type in Debug build only
50 #ifndef NDEBUG
51 #define GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
52 #endif
53 
54 namespace google {
55 namespace protobuf {
56   namespace io {
57     class CodedInputStream;      // coded_stream.h
58     class CodedOutputStream;     // coded_stream.h
59   }
60   class UnknownFieldSet;         // unknown_field_set.h
61 }
62 
63 namespace protobuf {
64 namespace internal {
65 
66 // This class is for internal use by the protocol buffer library and by
67 // protocol-complier-generated message classes.  It must not be called
68 // directly by clients.
69 //
70 // This class contains code for implementing the binary protocol buffer
71 // wire format via reflection.  The WireFormatLite class implements the
72 // non-reflection based routines.
73 //
74 // This class is really a namespace that contains only static methods
75 class LIBPROTOBUF_EXPORT WireFormat {
76  public:
77 
78   // Given a field return its WireType
79   static inline WireFormatLite::WireType WireTypeForField(
80       const FieldDescriptor* field);
81 
82   // Given a FieldDescriptor::Type return its WireType
83   static inline WireFormatLite::WireType WireTypeForFieldType(
84       FieldDescriptor::Type type);
85 
86   // Compute the byte size of a tag.  For groups, this includes both the start
87   // and end tags.
88   static inline int TagSize(int field_number, FieldDescriptor::Type type);
89 
90   // These procedures can be used to implement the methods of Message which
91   // handle parsing and serialization of the protocol buffer wire format
92   // using only the Reflection interface.  When you ask the protocol
93   // compiler to optimize for code size rather than speed, it will implement
94   // those methods in terms of these procedures.  Of course, these are much
95   // slower than the specialized implementations which the protocol compiler
96   // generates when told to optimize for speed.
97 
98   // Read a message in protocol buffer wire format.
99   //
100   // This procedure reads either to the end of the input stream or through
101   // a WIRETYPE_END_GROUP tag ending the message, whichever comes first.
102   // It returns false if the input is invalid.
103   //
104   // Required fields are NOT checked by this method.  You must call
105   // IsInitialized() on the resulting message yourself.
106   static bool ParseAndMergePartial(io::CodedInputStream* input,
107                                    Message* message);
108 
109   // Serialize a message in protocol buffer wire format.
110   //
111   // Any embedded messages within the message must have their correct sizes
112   // cached.  However, the top-level message need not; its size is passed as
113   // a parameter to this procedure.
114   //
115   // These return false iff the underlying stream returns a write error.
116   static void SerializeWithCachedSizes(
117       const Message& message,
118       int size, io::CodedOutputStream* output);
119 
120   // Implements Message::ByteSize() via reflection.  WARNING:  The result
121   // of this method is *not* cached anywhere.  However, all embedded messages
122   // will have their ByteSize() methods called, so their sizes will be cached.
123   // Therefore, calling this method is sufficient to allow you to call
124   // WireFormat::SerializeWithCachedSizes() on the same object.
125   static int ByteSize(const Message& message);
126 
127   // -----------------------------------------------------------------
128   // Helpers for dealing with unknown fields
129 
130   // Skips a field value of the given WireType.  The input should start
131   // positioned immediately after the tag.  If unknown_fields is non-NULL,
132   // the contents of the field will be added to it.
133   static bool SkipField(io::CodedInputStream* input, uint32 tag,
134                         UnknownFieldSet* unknown_fields);
135 
136   // Reads and ignores a message from the input.  If unknown_fields is non-NULL,
137   // the contents will be added to it.
138   static bool SkipMessage(io::CodedInputStream* input,
139                           UnknownFieldSet* unknown_fields);
140 
141   // Read a packed enum field. If the is_valid function is not NULL, values for
142   // which is_valid(value) returns false are appended to unknown_fields_stream.
143   static bool ReadPackedEnumPreserveUnknowns(io::CodedInputStream* input,
144                                              uint32 field_number,
145                                              bool (*is_valid)(int),
146                                              UnknownFieldSet* unknown_fields,
147                                              RepeatedField<int>* values);
148 
149   // Write the contents of an UnknownFieldSet to the output.
150   static void SerializeUnknownFields(const UnknownFieldSet& unknown_fields,
151                                      io::CodedOutputStream* output);
152   // Same as above, except writing directly to the provided buffer.
153   // Requires that the buffer have sufficient capacity for
154   // ComputeUnknownFieldsSize(unknown_fields).
155   //
156   // Returns a pointer past the last written byte.
157   static uint8* SerializeUnknownFieldsToArray(
158       const UnknownFieldSet& unknown_fields,
159       uint8* target);
160 
161   // Same thing except for messages that have the message_set_wire_format
162   // option.
163   static void SerializeUnknownMessageSetItems(
164       const UnknownFieldSet& unknown_fields,
165       io::CodedOutputStream* output);
166   // Same as above, except writing directly to the provided buffer.
167   // Requires that the buffer have sufficient capacity for
168   // ComputeUnknownMessageSetItemsSize(unknown_fields).
169   //
170   // Returns a pointer past the last written byte.
171   static uint8* SerializeUnknownMessageSetItemsToArray(
172       const UnknownFieldSet& unknown_fields,
173       uint8* target);
174 
175   // Compute the size of the UnknownFieldSet on the wire.
176   static int ComputeUnknownFieldsSize(const UnknownFieldSet& unknown_fields);
177 
178   // Same thing except for messages that have the message_set_wire_format
179   // option.
180   static int ComputeUnknownMessageSetItemsSize(
181       const UnknownFieldSet& unknown_fields);
182 
183 
184   // Helper functions for encoding and decoding tags.  (Inlined below and in
185   // _inl.h)
186   //
187   // This is different from MakeTag(field->number(), field->type()) in the case
188   // of packed repeated fields.
189   static uint32 MakeTag(const FieldDescriptor* field);
190 
191   // Parse a single field.  The input should start out positioned immediately
192   // after the tag.
193   static bool ParseAndMergeField(
194       uint32 tag,
195       const FieldDescriptor* field,        // May be NULL for unknown
196       Message* message,
197       io::CodedInputStream* input);
198 
199   // Serialize a single field.
200   static void SerializeFieldWithCachedSizes(
201       const FieldDescriptor* field,        // Cannot be NULL
202       const Message& message,
203       io::CodedOutputStream* output);
204 
205   // Compute size of a single field.  If the field is a message type, this
206   // will call ByteSize() for the embedded message, insuring that it caches
207   // its size.
208   static int FieldByteSize(
209       const FieldDescriptor* field,        // Cannot be NULL
210       const Message& message);
211 
212   // Parse/serialize a MessageSet::Item group.  Used with messages that use
213   // opion message_set_wire_format = true.
214   static bool ParseAndMergeMessageSetItem(
215       io::CodedInputStream* input,
216       Message* message);
217   static void SerializeMessageSetItemWithCachedSizes(
218       const FieldDescriptor* field,
219       const Message& message,
220       io::CodedOutputStream* output);
221   static int MessageSetItemByteSize(
222       const FieldDescriptor* field,
223       const Message& message);
224 
225   // Computes the byte size of a field, excluding tags. For packed fields, it
226   // only includes the size of the raw data, and not the size of the total
227   // length, but for other length-delimited types, the size of the length is
228   // included.
229   static int FieldDataOnlyByteSize(
230       const FieldDescriptor* field,        // Cannot be NULL
231       const Message& message);
232 
233   enum Operation {
234     PARSE = 0,
235     SERIALIZE = 1,
236   };
237 
238   // Verifies that a string field is valid UTF8, logging an error if not.
239   // This function will not be called by newly generated protobuf code
240   // but remains present to support existing code.
241   static void VerifyUTF8String(const char* data, int size, Operation op);
242   // The NamedField variant takes a field name in order to produce an
243   // informative error message if verification fails.
244   static void VerifyUTF8StringNamedField(const char* data,
245                                          int size,
246                                          Operation op,
247                                          const char* field_name);
248 
249  private:
250   // Skip a MessageSet field.
251   static bool SkipMessageSetField(io::CodedInputStream* input,
252                                   uint32 field_number,
253                                   UnknownFieldSet* unknown_fields);
254 
255   // Parse a MessageSet field.
256   static bool ParseAndMergeMessageSetField(uint32 field_number,
257                                            const FieldDescriptor* field,
258                                            Message* message,
259                                            io::CodedInputStream* input);
260 
261   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(WireFormat);
262 };
263 
264 // Subclass of FieldSkipper which saves skipped fields to an UnknownFieldSet.
265 class LIBPROTOBUF_EXPORT UnknownFieldSetFieldSkipper : public FieldSkipper {
266  public:
UnknownFieldSetFieldSkipper(UnknownFieldSet * unknown_fields)267   UnknownFieldSetFieldSkipper(UnknownFieldSet* unknown_fields)
268       : unknown_fields_(unknown_fields) {}
~UnknownFieldSetFieldSkipper()269   virtual ~UnknownFieldSetFieldSkipper() {}
270 
271   // implements FieldSkipper -----------------------------------------
272   virtual bool SkipField(io::CodedInputStream* input, uint32 tag);
273   virtual bool SkipMessage(io::CodedInputStream* input);
274   virtual void SkipUnknownEnum(int field_number, int value);
275 
276  protected:
277   UnknownFieldSet* unknown_fields_;
278 };
279 
280 // inline methods ====================================================
281 
WireTypeForField(const FieldDescriptor * field)282 inline WireFormatLite::WireType WireFormat::WireTypeForField(
283     const FieldDescriptor* field) {
284   if (field->is_packed()) {
285     return WireFormatLite::WIRETYPE_LENGTH_DELIMITED;
286   } else {
287     return WireTypeForFieldType(field->type());
288   }
289 }
290 
WireTypeForFieldType(FieldDescriptor::Type type)291 inline WireFormatLite::WireType WireFormat::WireTypeForFieldType(
292     FieldDescriptor::Type type) {
293   // Some compilers don't like enum -> enum casts, so we implicit_cast to
294   // int first.
295   return WireFormatLite::WireTypeForFieldType(
296       static_cast<WireFormatLite::FieldType>(
297         implicit_cast<int>(type)));
298 }
299 
MakeTag(const FieldDescriptor * field)300 inline uint32 WireFormat::MakeTag(const FieldDescriptor* field) {
301   return WireFormatLite::MakeTag(field->number(), WireTypeForField(field));
302 }
303 
TagSize(int field_number,FieldDescriptor::Type type)304 inline int WireFormat::TagSize(int field_number, FieldDescriptor::Type type) {
305   // Some compilers don't like enum -> enum casts, so we implicit_cast to
306   // int first.
307   return WireFormatLite::TagSize(field_number,
308       static_cast<WireFormatLite::FieldType>(
309         implicit_cast<int>(type)));
310 }
311 
VerifyUTF8String(const char * data,int size,WireFormat::Operation op)312 inline void WireFormat::VerifyUTF8String(const char* data, int size,
313     WireFormat::Operation op) {
314 #ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
315   WireFormatLite::VerifyUtf8String(
316       data, size, static_cast<WireFormatLite::Operation>(op), NULL);
317 #else
318   // Avoid the compiler warning about unused variables.
319   (void)data; (void)size; (void)op;
320 #endif
321 }
322 
VerifyUTF8StringNamedField(const char * data,int size,WireFormat::Operation op,const char * field_name)323 inline void WireFormat::VerifyUTF8StringNamedField(
324     const char* data, int size, WireFormat::Operation op,
325     const char* field_name) {
326 #ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
327   WireFormatLite::VerifyUtf8String(
328       data, size, static_cast<WireFormatLite::Operation>(op), field_name);
329 #endif
330 }
331 
332 
333 }  // namespace internal
334 }  // namespace protobuf
335 
336 }  // namespace google
337 #endif  // GOOGLE_PROTOBUF_WIRE_FORMAT_H__
338