1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: kenton@google.com (Kenton Varda)
32 // atenasio@google.com (Chris Atenasio) (ZigZag transform)
33 // Based on original Protocol Buffers design by
34 // Sanjay Ghemawat, Jeff Dean, and others.
35 //
36 // This header is logically internal, but is made public because it is used
37 // from protocol-compiler-generated code, which may reside in other components.
38
39 #ifndef GOOGLE_PROTOBUF_WIRE_FORMAT_H__
40 #define GOOGLE_PROTOBUF_WIRE_FORMAT_H__
41
42 #include <string>
43 #include <google/protobuf/stubs/common.h>
44 #include <google/protobuf/descriptor.pb.h>
45 #include <google/protobuf/descriptor.h>
46 #include <google/protobuf/message.h>
47 #include <google/protobuf/wire_format_lite.h>
48
49 // Do UTF-8 validation on string type in Debug build only
50 #ifndef NDEBUG
51 #define GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
52 #endif
53
54 namespace google {
55 namespace protobuf {
56 namespace io {
57 class CodedInputStream; // coded_stream.h
58 class CodedOutputStream; // coded_stream.h
59 }
60 class UnknownFieldSet; // unknown_field_set.h
61 }
62
63 namespace protobuf {
64 namespace internal {
65
66 // This class is for internal use by the protocol buffer library and by
67 // protocol-complier-generated message classes. It must not be called
68 // directly by clients.
69 //
70 // This class contains code for implementing the binary protocol buffer
71 // wire format via reflection. The WireFormatLite class implements the
72 // non-reflection based routines.
73 //
74 // This class is really a namespace that contains only static methods
75 class LIBPROTOBUF_EXPORT WireFormat {
76 public:
77
78 // Given a field return its WireType
79 static inline WireFormatLite::WireType WireTypeForField(
80 const FieldDescriptor* field);
81
82 // Given a FieldDescriptor::Type return its WireType
83 static inline WireFormatLite::WireType WireTypeForFieldType(
84 FieldDescriptor::Type type);
85
86 // Compute the byte size of a tag. For groups, this includes both the start
87 // and end tags.
88 static inline int TagSize(int field_number, FieldDescriptor::Type type);
89
90 // These procedures can be used to implement the methods of Message which
91 // handle parsing and serialization of the protocol buffer wire format
92 // using only the Reflection interface. When you ask the protocol
93 // compiler to optimize for code size rather than speed, it will implement
94 // those methods in terms of these procedures. Of course, these are much
95 // slower than the specialized implementations which the protocol compiler
96 // generates when told to optimize for speed.
97
98 // Read a message in protocol buffer wire format.
99 //
100 // This procedure reads either to the end of the input stream or through
101 // a WIRETYPE_END_GROUP tag ending the message, whichever comes first.
102 // It returns false if the input is invalid.
103 //
104 // Required fields are NOT checked by this method. You must call
105 // IsInitialized() on the resulting message yourself.
106 static bool ParseAndMergePartial(io::CodedInputStream* input,
107 Message* message);
108
109 // Serialize a message in protocol buffer wire format.
110 //
111 // Any embedded messages within the message must have their correct sizes
112 // cached. However, the top-level message need not; its size is passed as
113 // a parameter to this procedure.
114 //
115 // These return false iff the underlying stream returns a write error.
116 static void SerializeWithCachedSizes(
117 const Message& message,
118 int size, io::CodedOutputStream* output);
119
120 // Implements Message::ByteSize() via reflection. WARNING: The result
121 // of this method is *not* cached anywhere. However, all embedded messages
122 // will have their ByteSize() methods called, so their sizes will be cached.
123 // Therefore, calling this method is sufficient to allow you to call
124 // WireFormat::SerializeWithCachedSizes() on the same object.
125 static int ByteSize(const Message& message);
126
127 // -----------------------------------------------------------------
128 // Helpers for dealing with unknown fields
129
130 // Skips a field value of the given WireType. The input should start
131 // positioned immediately after the tag. If unknown_fields is non-NULL,
132 // the contents of the field will be added to it.
133 static bool SkipField(io::CodedInputStream* input, uint32 tag,
134 UnknownFieldSet* unknown_fields);
135
136 // Reads and ignores a message from the input. If unknown_fields is non-NULL,
137 // the contents will be added to it.
138 static bool SkipMessage(io::CodedInputStream* input,
139 UnknownFieldSet* unknown_fields);
140
141 // Write the contents of an UnknownFieldSet to the output.
142 static void SerializeUnknownFields(const UnknownFieldSet& unknown_fields,
143 io::CodedOutputStream* output);
144 // Same as above, except writing directly to the provided buffer.
145 // Requires that the buffer have sufficient capacity for
146 // ComputeUnknownFieldsSize(unknown_fields).
147 //
148 // Returns a pointer past the last written byte.
149 static uint8* SerializeUnknownFieldsToArray(
150 const UnknownFieldSet& unknown_fields,
151 uint8* target);
152
153 // Same thing except for messages that have the message_set_wire_format
154 // option.
155 static void SerializeUnknownMessageSetItems(
156 const UnknownFieldSet& unknown_fields,
157 io::CodedOutputStream* output);
158 // Same as above, except writing directly to the provided buffer.
159 // Requires that the buffer have sufficient capacity for
160 // ComputeUnknownMessageSetItemsSize(unknown_fields).
161 //
162 // Returns a pointer past the last written byte.
163 static uint8* SerializeUnknownMessageSetItemsToArray(
164 const UnknownFieldSet& unknown_fields,
165 uint8* target);
166
167 // Compute the size of the UnknownFieldSet on the wire.
168 static int ComputeUnknownFieldsSize(const UnknownFieldSet& unknown_fields);
169
170 // Same thing except for messages that have the message_set_wire_format
171 // option.
172 static int ComputeUnknownMessageSetItemsSize(
173 const UnknownFieldSet& unknown_fields);
174
175
176 // Helper functions for encoding and decoding tags. (Inlined below and in
177 // _inl.h)
178 //
179 // This is different from MakeTag(field->number(), field->type()) in the case
180 // of packed repeated fields.
181 static uint32 MakeTag(const FieldDescriptor* field);
182
183 // Parse a single field. The input should start out positioned immediately
184 // after the tag.
185 static bool ParseAndMergeField(
186 uint32 tag,
187 const FieldDescriptor* field, // May be NULL for unknown
188 Message* message,
189 io::CodedInputStream* input);
190
191 // Serialize a single field.
192 static void SerializeFieldWithCachedSizes(
193 const FieldDescriptor* field, // Cannot be NULL
194 const Message& message,
195 io::CodedOutputStream* output);
196
197 // Compute size of a single field. If the field is a message type, this
198 // will call ByteSize() for the embedded message, insuring that it caches
199 // its size.
200 static int FieldByteSize(
201 const FieldDescriptor* field, // Cannot be NULL
202 const Message& message);
203
204 // Parse/serialize a MessageSet::Item group. Used with messages that use
205 // opion message_set_wire_format = true.
206 static bool ParseAndMergeMessageSetItem(
207 io::CodedInputStream* input,
208 Message* message);
209 static void SerializeMessageSetItemWithCachedSizes(
210 const FieldDescriptor* field,
211 const Message& message,
212 io::CodedOutputStream* output);
213 static int MessageSetItemByteSize(
214 const FieldDescriptor* field,
215 const Message& message);
216
217 // Computes the byte size of a field, excluding tags. For packed fields, it
218 // only includes the size of the raw data, and not the size of the total
219 // length, but for other length-delimited types, the size of the length is
220 // included.
221 static int FieldDataOnlyByteSize(
222 const FieldDescriptor* field, // Cannot be NULL
223 const Message& message);
224
225 enum Operation {
226 PARSE,
227 SERIALIZE,
228 };
229
230 // Verifies that a string field is valid UTF8, logging an error if not.
231 // This function will not be called by newly generated protobuf code
232 // but remains present to support existing code.
233 static void VerifyUTF8String(const char* data, int size, Operation op);
234 // The NamedField variant takes a field name in order to produce an
235 // informative error message if verification fails.
236 static void VerifyUTF8StringNamedField(const char* data,
237 int size,
238 Operation op,
239 const char* field_name);
240
241 private:
242 // Verifies that a string field is valid UTF8, logging an error if not.
243 static void VerifyUTF8StringFallback(
244 const char* data,
245 int size,
246 Operation op,
247 const char* field_name);
248
249 // Skip a MessageSet field.
250 static bool SkipMessageSetField(io::CodedInputStream* input,
251 uint32 field_number,
252 UnknownFieldSet* unknown_fields);
253
254 // Parse a MessageSet field.
255 static bool ParseAndMergeMessageSetField(uint32 field_number,
256 const FieldDescriptor* field,
257 Message* message,
258 io::CodedInputStream* input);
259
260
261
262 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(WireFormat);
263 };
264
265 // Subclass of FieldSkipper which saves skipped fields to an UnknownFieldSet.
266 class LIBPROTOBUF_EXPORT UnknownFieldSetFieldSkipper : public FieldSkipper {
267 public:
UnknownFieldSetFieldSkipper(UnknownFieldSet * unknown_fields)268 UnknownFieldSetFieldSkipper(UnknownFieldSet* unknown_fields)
269 : unknown_fields_(unknown_fields) {}
~UnknownFieldSetFieldSkipper()270 virtual ~UnknownFieldSetFieldSkipper() {}
271
272 // implements FieldSkipper -----------------------------------------
273 virtual bool SkipField(io::CodedInputStream* input, uint32 tag);
274 virtual bool SkipMessage(io::CodedInputStream* input);
275 virtual void SkipUnknownEnum(int field_number, int value);
276
277 protected:
278 UnknownFieldSet* unknown_fields_;
279 };
280
281 // inline methods ====================================================
282
WireTypeForField(const FieldDescriptor * field)283 inline WireFormatLite::WireType WireFormat::WireTypeForField(
284 const FieldDescriptor* field) {
285 if (field->options().packed()) {
286 return WireFormatLite::WIRETYPE_LENGTH_DELIMITED;
287 } else {
288 return WireTypeForFieldType(field->type());
289 }
290 }
291
WireTypeForFieldType(FieldDescriptor::Type type)292 inline WireFormatLite::WireType WireFormat::WireTypeForFieldType(
293 FieldDescriptor::Type type) {
294 // Some compilers don't like enum -> enum casts, so we implicit_cast to
295 // int first.
296 return WireFormatLite::WireTypeForFieldType(
297 static_cast<WireFormatLite::FieldType>(
298 implicit_cast<int>(type)));
299 }
300
MakeTag(const FieldDescriptor * field)301 inline uint32 WireFormat::MakeTag(const FieldDescriptor* field) {
302 return WireFormatLite::MakeTag(field->number(), WireTypeForField(field));
303 }
304
TagSize(int field_number,FieldDescriptor::Type type)305 inline int WireFormat::TagSize(int field_number, FieldDescriptor::Type type) {
306 // Some compilers don't like enum -> enum casts, so we implicit_cast to
307 // int first.
308 return WireFormatLite::TagSize(field_number,
309 static_cast<WireFormatLite::FieldType>(
310 implicit_cast<int>(type)));
311 }
312
VerifyUTF8String(const char * data,int size,WireFormat::Operation op)313 inline void WireFormat::VerifyUTF8String(const char* data, int size,
314 WireFormat::Operation op) {
315 #ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
316 WireFormat::VerifyUTF8StringFallback(data, size, op, NULL);
317 #else
318 // Avoid the compiler warning about unsued variables.
319 (void)data; (void)size; (void)op;
320 #endif
321 }
322
VerifyUTF8StringNamedField(const char * data,int size,WireFormat::Operation op,const char * field_name)323 inline void WireFormat::VerifyUTF8StringNamedField(
324 const char* data, int size, WireFormat::Operation op,
325 const char* field_name) {
326 #ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
327 WireFormat::VerifyUTF8StringFallback(data, size, op, field_name);
328 #endif
329 }
330
331
332 } // namespace internal
333 } // namespace protobuf
334
335 } // namespace google
336 #endif // GOOGLE_PROTOBUF_WIRE_FORMAT_H__
337