1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // This file contains the CodedInputStream and CodedOutputStream classes,
36 // which wrap a ZeroCopyInputStream or ZeroCopyOutputStream, respectively,
37 // and allow you to read or write individual pieces of data in various
38 // formats.  In particular, these implement the varint encoding for
39 // integers, a simple variable-length encoding in which smaller numbers
40 // take fewer bytes.
41 //
42 // Typically these classes will only be used internally by the protocol
43 // buffer library in order to encode and decode protocol buffers.  Clients
44 // of the library only need to know about this class if they wish to write
45 // custom message parsing or serialization procedures.
46 //
47 // CodedOutputStream example:
48 //   // Write some data to "myfile".  First we write a 4-byte "magic number"
49 //   // to identify the file type, then write a length-delimited string.  The
50 //   // string is composed of a varint giving the length followed by the raw
51 //   // bytes.
52 //   int fd = open("myfile", O_CREAT | O_WRONLY);
53 //   ZeroCopyOutputStream* raw_output = new FileOutputStream(fd);
54 //   CodedOutputStream* coded_output = new CodedOutputStream(raw_output);
55 //
56 //   int magic_number = 1234;
57 //   char text[] = "Hello world!";
58 //   coded_output->WriteLittleEndian32(magic_number);
59 //   coded_output->WriteVarint32(strlen(text));
60 //   coded_output->WriteRaw(text, strlen(text));
61 //
62 //   delete coded_output;
63 //   delete raw_output;
64 //   close(fd);
65 //
66 // CodedInputStream example:
67 //   // Read a file created by the above code.
68 //   int fd = open("myfile", O_RDONLY);
69 //   ZeroCopyInputStream* raw_input = new FileInputStream(fd);
70 //   CodedInputStream coded_input = new CodedInputStream(raw_input);
71 //
72 //   coded_input->ReadLittleEndian32(&magic_number);
73 //   if (magic_number != 1234) {
74 //     cerr << "File not in expected format." << endl;
75 //     return;
76 //   }
77 //
78 //   uint32 size;
79 //   coded_input->ReadVarint32(&size);
80 //
81 //   char* text = new char[size + 1];
82 //   coded_input->ReadRaw(buffer, size);
83 //   text[size] = '\0';
84 //
85 //   delete coded_input;
86 //   delete raw_input;
87 //   close(fd);
88 //
89 //   cout << "Text is: " << text << endl;
90 //   delete [] text;
91 //
92 // For those who are interested, varint encoding is defined as follows:
93 //
94 // The encoding operates on unsigned integers of up to 64 bits in length.
95 // Each byte of the encoded value has the format:
96 // * bits 0-6: Seven bits of the number being encoded.
97 // * bit 7: Zero if this is the last byte in the encoding (in which
98 //   case all remaining bits of the number are zero) or 1 if
99 //   more bytes follow.
100 // The first byte contains the least-significant 7 bits of the number, the
101 // second byte (if present) contains the next-least-significant 7 bits,
102 // and so on.  So, the binary number 1011000101011 would be encoded in two
103 // bytes as "10101011 00101100".
104 //
105 // In theory, varint could be used to encode integers of any length.
106 // However, for practicality we set a limit at 64 bits.  The maximum encoded
107 // length of a number is thus 10 bytes.
108 
109 #ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
110 #define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
111 
112 #include <assert.h>
113 #include <string>
114 #include <utility>
115 #ifdef _MSC_VER
116   // Assuming windows is always little-endian.
117   #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
118     #define PROTOBUF_LITTLE_ENDIAN 1
119   #endif
120   #if _MSC_VER >= 1300 && !defined(__INTEL_COMPILER)
121     // If MSVC has "/RTCc" set, it will complain about truncating casts at
122     // runtime.  This file contains some intentional truncating casts.
123     #pragma runtime_checks("c", off)
124   #endif
125 #else
126   #include <sys/param.h>   // __BYTE_ORDER
127   #if ((defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__)) || \
128          (defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN)) && \
129       !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
130     #define PROTOBUF_LITTLE_ENDIAN 1
131   #endif
132 #endif
133 #include <google/protobuf/stubs/common.h>
134 
135 namespace google {
136 
137 namespace protobuf {
138 
139 class DescriptorPool;
140 class MessageFactory;
141 
142 namespace io {
143 
144 // Defined in this file.
145 class CodedInputStream;
146 class CodedOutputStream;
147 
148 // Defined in other files.
149 class ZeroCopyInputStream;           // zero_copy_stream.h
150 class ZeroCopyOutputStream;          // zero_copy_stream.h
151 
152 // Class which reads and decodes binary data which is composed of varint-
153 // encoded integers and fixed-width pieces.  Wraps a ZeroCopyInputStream.
154 // Most users will not need to deal with CodedInputStream.
155 //
156 // Most methods of CodedInputStream that return a bool return false if an
157 // underlying I/O error occurs or if the data is malformed.  Once such a
158 // failure occurs, the CodedInputStream is broken and is no longer useful.
159 class LIBPROTOBUF_EXPORT CodedInputStream {
160  public:
161   // Create a CodedInputStream that reads from the given ZeroCopyInputStream.
162   explicit CodedInputStream(ZeroCopyInputStream* input);
163 
164   // Create a CodedInputStream that reads from the given flat array.  This is
165   // faster than using an ArrayInputStream.  PushLimit(size) is implied by
166   // this constructor.
167   explicit CodedInputStream(const uint8* buffer, int size);
168 
169   // Destroy the CodedInputStream and position the underlying
170   // ZeroCopyInputStream at the first unread byte.  If an error occurred while
171   // reading (causing a method to return false), then the exact position of
172   // the input stream may be anywhere between the last value that was read
173   // successfully and the stream's byte limit.
174   ~CodedInputStream();
175 
176   // Return true if this CodedInputStream reads from a flat array instead of
177   // a ZeroCopyInputStream.
178   inline bool IsFlat() const;
179 
180   // Skips a number of bytes.  Returns false if an underlying read error
181   // occurs.
182   bool Skip(int count);
183 
184   // Sets *data to point directly at the unread part of the CodedInputStream's
185   // underlying buffer, and *size to the size of that buffer, but does not
186   // advance the stream's current position.  This will always either produce
187   // a non-empty buffer or return false.  If the caller consumes any of
188   // this data, it should then call Skip() to skip over the consumed bytes.
189   // This may be useful for implementing external fast parsing routines for
190   // types of data not covered by the CodedInputStream interface.
191   bool GetDirectBufferPointer(const void** data, int* size);
192 
193   // Like GetDirectBufferPointer, but this method is inlined, and does not
194   // attempt to Refresh() if the buffer is currently empty.
195   GOOGLE_ATTRIBUTE_ALWAYS_INLINE void GetDirectBufferPointerInline(const void** data,
196                                                             int* size);
197 
198   // Read raw bytes, copying them into the given buffer.
199   bool ReadRaw(void* buffer, int size);
200 
201   // Like the above, with inlined optimizations. This should only be used
202   // by the protobuf implementation.
203   GOOGLE_ATTRIBUTE_ALWAYS_INLINE bool InternalReadRawInline(void* buffer, int size);
204 
205   // Like ReadRaw, but reads into a string.
206   //
207   // Implementation Note:  ReadString() grows the string gradually as it
208   // reads in the data, rather than allocating the entire requested size
209   // upfront.  This prevents denial-of-service attacks in which a client
210   // could claim that a string is going to be MAX_INT bytes long in order to
211   // crash the server because it can't allocate this much space at once.
212   bool ReadString(string* buffer, int size);
213   // Like the above, with inlined optimizations. This should only be used
214   // by the protobuf implementation.
215   GOOGLE_ATTRIBUTE_ALWAYS_INLINE bool InternalReadStringInline(string* buffer,
216                                                         int size);
217 
218 
219   // Read a 32-bit little-endian integer.
220   bool ReadLittleEndian32(uint32* value);
221   // Read a 64-bit little-endian integer.
222   bool ReadLittleEndian64(uint64* value);
223 
224   // These methods read from an externally provided buffer. The caller is
225   // responsible for ensuring that the buffer has sufficient space.
226   // Read a 32-bit little-endian integer.
227   static const uint8* ReadLittleEndian32FromArray(const uint8* buffer,
228                                                    uint32* value);
229   // Read a 64-bit little-endian integer.
230   static const uint8* ReadLittleEndian64FromArray(const uint8* buffer,
231                                                    uint64* value);
232 
233   // Read an unsigned integer with Varint encoding, truncating to 32 bits.
234   // Reading a 32-bit value is equivalent to reading a 64-bit one and casting
235   // it to uint32, but may be more efficient.
236   bool ReadVarint32(uint32* value);
237   // Read an unsigned integer with Varint encoding.
238   bool ReadVarint64(uint64* value);
239 
240   // Reads a varint off the wire into an "int". This should be used for reading
241   // sizes off the wire (sizes of strings, submessages, bytes fields, etc).
242   //
243   // The value from the wire is interpreted as unsigned.  If its value exceeds
244   // the representable value of an integer on this platform, instead of
245   // truncating we return false. Truncating (as performed by ReadVarint32()
246   // above) is an acceptable approach for fields representing an integer, but
247   // when we are parsing a size from the wire, truncating the value would result
248   // in us misparsing the payload.
249   bool ReadVarintSizeAsInt(int* value);
250 
251   // Read a tag.  This calls ReadVarint32() and returns the result, or returns
252   // zero (which is not a valid tag) if ReadVarint32() fails.  Also, it updates
253   // the last tag value, which can be checked with LastTagWas().
254   // Always inline because this is only called in one place per parse loop
255   // but it is called for every iteration of said loop, so it should be fast.
256   // GCC doesn't want to inline this by default.
257   GOOGLE_ATTRIBUTE_ALWAYS_INLINE uint32 ReadTag();
258 
259   // This usually a faster alternative to ReadTag() when cutoff is a manifest
260   // constant.  It does particularly well for cutoff >= 127.  The first part
261   // of the return value is the tag that was read, though it can also be 0 in
262   // the cases where ReadTag() would return 0.  If the second part is true
263   // then the tag is known to be in [0, cutoff].  If not, the tag either is
264   // above cutoff or is 0.  (There's intentional wiggle room when tag is 0,
265   // because that can arise in several ways, and for best performance we want
266   // to avoid an extra "is tag == 0?" check here.)
267   GOOGLE_ATTRIBUTE_ALWAYS_INLINE std::pair<uint32, bool> ReadTagWithCutoff(
268       uint32 cutoff);
269 
270   // Usually returns true if calling ReadVarint32() now would produce the given
271   // value.  Will always return false if ReadVarint32() would not return the
272   // given value.  If ExpectTag() returns true, it also advances past
273   // the varint.  For best performance, use a compile-time constant as the
274   // parameter.
275   // Always inline because this collapses to a small number of instructions
276   // when given a constant parameter, but GCC doesn't want to inline by default.
277   GOOGLE_ATTRIBUTE_ALWAYS_INLINE bool ExpectTag(uint32 expected);
278 
279   // Like above, except this reads from the specified buffer. The caller is
280   // responsible for ensuring that the buffer is large enough to read a varint
281   // of the expected size. For best performance, use a compile-time constant as
282   // the expected tag parameter.
283   //
284   // Returns a pointer beyond the expected tag if it was found, or NULL if it
285   // was not.
286   GOOGLE_ATTRIBUTE_ALWAYS_INLINE static const uint8* ExpectTagFromArray(
287       const uint8* buffer,
288       uint32 expected);
289 
290   // Usually returns true if no more bytes can be read.  Always returns false
291   // if more bytes can be read.  If ExpectAtEnd() returns true, a subsequent
292   // call to LastTagWas() will act as if ReadTag() had been called and returned
293   // zero, and ConsumedEntireMessage() will return true.
294   bool ExpectAtEnd();
295 
296   // If the last call to ReadTag() or ReadTagWithCutoff() returned the
297   // given value, returns true.  Otherwise, returns false;
298   //
299   // This is needed because parsers for some types of embedded messages
300   // (with field type TYPE_GROUP) don't actually know that they've reached the
301   // end of a message until they see an ENDGROUP tag, which was actually part
302   // of the enclosing message.  The enclosing message would like to check that
303   // tag to make sure it had the right number, so it calls LastTagWas() on
304   // return from the embedded parser to check.
305   bool LastTagWas(uint32 expected);
306 
307   // When parsing message (but NOT a group), this method must be called
308   // immediately after MergeFromCodedStream() returns (if it returns true)
309   // to further verify that the message ended in a legitimate way.  For
310   // example, this verifies that parsing did not end on an end-group tag.
311   // It also checks for some cases where, due to optimizations,
312   // MergeFromCodedStream() can incorrectly return true.
313   bool ConsumedEntireMessage();
314 
315   // Limits ----------------------------------------------------------
316   // Limits are used when parsing length-delimited embedded messages.
317   // After the message's length is read, PushLimit() is used to prevent
318   // the CodedInputStream from reading beyond that length.  Once the
319   // embedded message has been parsed, PopLimit() is called to undo the
320   // limit.
321 
322   // Opaque type used with PushLimit() and PopLimit().  Do not modify
323   // values of this type yourself.  The only reason that this isn't a
324   // struct with private internals is for efficiency.
325   typedef int Limit;
326 
327   // Places a limit on the number of bytes that the stream may read,
328   // starting from the current position.  Once the stream hits this limit,
329   // it will act like the end of the input has been reached until PopLimit()
330   // is called.
331   //
332   // As the names imply, the stream conceptually has a stack of limits.  The
333   // shortest limit on the stack is always enforced, even if it is not the
334   // top limit.
335   //
336   // The value returned by PushLimit() is opaque to the caller, and must
337   // be passed unchanged to the corresponding call to PopLimit().
338   Limit PushLimit(int byte_limit);
339 
340   // Pops the last limit pushed by PushLimit().  The input must be the value
341   // returned by that call to PushLimit().
342   void PopLimit(Limit limit);
343 
344   // Returns the number of bytes left until the nearest limit on the
345   // stack is hit, or -1 if no limits are in place.
346   int BytesUntilLimit() const;
347 
348   // Returns current position relative to the beginning of the input stream.
349   int CurrentPosition() const;
350 
351   // Total Bytes Limit -----------------------------------------------
352   // To prevent malicious users from sending excessively large messages
353   // and causing integer overflows or memory exhaustion, CodedInputStream
354   // imposes a hard limit on the total number of bytes it will read.
355 
356   // Sets the maximum number of bytes that this CodedInputStream will read
357   // before refusing to continue.  To prevent integer overflows in the
358   // protocol buffers implementation, as well as to prevent servers from
359   // allocating enormous amounts of memory to hold parsed messages, the
360   // maximum message length should be limited to the shortest length that
361   // will not harm usability.  The theoretical shortest message that could
362   // cause integer overflows is 512MB.  The default limit is 64MB.  Apps
363   // should set shorter limits if possible.  If warning_threshold is not -1,
364   // a warning will be printed to stderr after warning_threshold bytes are
365   // read.  For backwards compatibility all negative values get squashed to -1,
366   // as other negative values might have special internal meanings.
367   // An error will always be printed to stderr if the limit is reached.
368   //
369   // This is unrelated to PushLimit()/PopLimit().
370   //
371   // Hint:  If you are reading this because your program is printing a
372   //   warning about dangerously large protocol messages, you may be
373   //   confused about what to do next.  The best option is to change your
374   //   design such that excessively large messages are not necessary.
375   //   For example, try to design file formats to consist of many small
376   //   messages rather than a single large one.  If this is infeasible,
377   //   you will need to increase the limit.  Chances are, though, that
378   //   your code never constructs a CodedInputStream on which the limit
379   //   can be set.  You probably parse messages by calling things like
380   //   Message::ParseFromString().  In this case, you will need to change
381   //   your code to instead construct some sort of ZeroCopyInputStream
382   //   (e.g. an ArrayInputStream), construct a CodedInputStream around
383   //   that, then call Message::ParseFromCodedStream() instead.  Then
384   //   you can adjust the limit.  Yes, it's more work, but you're doing
385   //   something unusual.
386   void SetTotalBytesLimit(int total_bytes_limit, int warning_threshold);
387 
388   // The Total Bytes Limit minus the Current Position, or -1 if there
389   // is no Total Bytes Limit.
390   int BytesUntilTotalBytesLimit() const;
391 
392   // Recursion Limit -------------------------------------------------
393   // To prevent corrupt or malicious messages from causing stack overflows,
394   // we must keep track of the depth of recursion when parsing embedded
395   // messages and groups.  CodedInputStream keeps track of this because it
396   // is the only object that is passed down the stack during parsing.
397 
398   // Sets the maximum recursion depth.  The default is 100.
399   void SetRecursionLimit(int limit);
400 
401 
402   // Increments the current recursion depth.  Returns true if the depth is
403   // under the limit, false if it has gone over.
404   bool IncrementRecursionDepth();
405 
406   // Decrements the recursion depth if possible.
407   void DecrementRecursionDepth();
408 
409   // Decrements the recursion depth blindly.  This is faster than
410   // DecrementRecursionDepth().  It should be used only if all previous
411   // increments to recursion depth were successful.
412   void UnsafeDecrementRecursionDepth();
413 
414   // Shorthand for make_pair(PushLimit(byte_limit), --recursion_budget_).
415   // Using this can reduce code size and complexity in some cases.  The caller
416   // is expected to check that the second part of the result is non-negative (to
417   // bail out if the depth of recursion is too high) and, if all is well, to
418   // later pass the first part of the result to PopLimit() or similar.
419   std::pair<CodedInputStream::Limit, int> IncrementRecursionDepthAndPushLimit(
420       int byte_limit);
421 
422   // Shorthand for PushLimit(ReadVarint32(&length) ? length : 0).
423   Limit ReadLengthAndPushLimit();
424 
425   // Helper that is equivalent to: {
426   //  bool result = ConsumedEntireMessage();
427   //  PopLimit(limit);
428   //  UnsafeDecrementRecursionDepth();
429   //  return result; }
430   // Using this can reduce code size and complexity in some cases.
431   // Do not use unless the current recursion depth is greater than zero.
432   bool DecrementRecursionDepthAndPopLimit(Limit limit);
433 
434   // Helper that is equivalent to: {
435   //  bool result = ConsumedEntireMessage();
436   //  PopLimit(limit);
437   //  return result; }
438   // Using this can reduce code size and complexity in some cases.
439   bool CheckEntireMessageConsumedAndPopLimit(Limit limit);
440 
441   // Extension Registry ----------------------------------------------
442   // ADVANCED USAGE:  99.9% of people can ignore this section.
443   //
444   // By default, when parsing extensions, the parser looks for extension
445   // definitions in the pool which owns the outer message's Descriptor.
446   // However, you may call SetExtensionRegistry() to provide an alternative
447   // pool instead.  This makes it possible, for example, to parse a message
448   // using a generated class, but represent some extensions using
449   // DynamicMessage.
450 
451   // Set the pool used to look up extensions.  Most users do not need to call
452   // this as the correct pool will be chosen automatically.
453   //
454   // WARNING:  It is very easy to misuse this.  Carefully read the requirements
455   //   below.  Do not use this unless you are sure you need it.  Almost no one
456   //   does.
457   //
458   // Let's say you are parsing a message into message object m, and you want
459   // to take advantage of SetExtensionRegistry().  You must follow these
460   // requirements:
461   //
462   // The given DescriptorPool must contain m->GetDescriptor().  It is not
463   // sufficient for it to simply contain a descriptor that has the same name
464   // and content -- it must be the *exact object*.  In other words:
465   //   assert(pool->FindMessageTypeByName(m->GetDescriptor()->full_name()) ==
466   //          m->GetDescriptor());
467   // There are two ways to satisfy this requirement:
468   // 1) Use m->GetDescriptor()->pool() as the pool.  This is generally useless
469   //    because this is the pool that would be used anyway if you didn't call
470   //    SetExtensionRegistry() at all.
471   // 2) Use a DescriptorPool which has m->GetDescriptor()->pool() as an
472   //    "underlay".  Read the documentation for DescriptorPool for more
473   //    information about underlays.
474   //
475   // You must also provide a MessageFactory.  This factory will be used to
476   // construct Message objects representing extensions.  The factory's
477   // GetPrototype() MUST return non-NULL for any Descriptor which can be found
478   // through the provided pool.
479   //
480   // If the provided factory might return instances of protocol-compiler-
481   // generated (i.e. compiled-in) types, or if the outer message object m is
482   // a generated type, then the given factory MUST have this property:  If
483   // GetPrototype() is given a Descriptor which resides in
484   // DescriptorPool::generated_pool(), the factory MUST return the same
485   // prototype which MessageFactory::generated_factory() would return.  That
486   // is, given a descriptor for a generated type, the factory must return an
487   // instance of the generated class (NOT DynamicMessage).  However, when
488   // given a descriptor for a type that is NOT in generated_pool, the factory
489   // is free to return any implementation.
490   //
491   // The reason for this requirement is that generated sub-objects may be
492   // accessed via the standard (non-reflection) extension accessor methods,
493   // and these methods will down-cast the object to the generated class type.
494   // If the object is not actually of that type, the results would be undefined.
495   // On the other hand, if an extension is not compiled in, then there is no
496   // way the code could end up accessing it via the standard accessors -- the
497   // only way to access the extension is via reflection.  When using reflection,
498   // DynamicMessage and generated messages are indistinguishable, so it's fine
499   // if these objects are represented using DynamicMessage.
500   //
501   // Using DynamicMessageFactory on which you have called
502   // SetDelegateToGeneratedFactory(true) should be sufficient to satisfy the
503   // above requirement.
504   //
505   // If either pool or factory is NULL, both must be NULL.
506   //
507   // Note that this feature is ignored when parsing "lite" messages as they do
508   // not have descriptors.
509   void SetExtensionRegistry(const DescriptorPool* pool,
510                             MessageFactory* factory);
511 
512   // Get the DescriptorPool set via SetExtensionRegistry(), or NULL if no pool
513   // has been provided.
514   const DescriptorPool* GetExtensionPool();
515 
516   // Get the MessageFactory set via SetExtensionRegistry(), or NULL if no
517   // factory has been provided.
518   MessageFactory* GetExtensionFactory();
519 
520  private:
521   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedInputStream);
522 
523   const uint8* buffer_;
524   const uint8* buffer_end_;     // pointer to the end of the buffer.
525   ZeroCopyInputStream* input_;
526   int total_bytes_read_;  // total bytes read from input_, including
527                           // the current buffer
528 
529   // If total_bytes_read_ surpasses INT_MAX, we record the extra bytes here
530   // so that we can BackUp() on destruction.
531   int overflow_bytes_;
532 
533   // LastTagWas() stuff.
534   uint32 last_tag_;         // result of last ReadTag() or ReadTagWithCutoff().
535 
536   // This is set true by ReadTag{Fallback/Slow}() if it is called when exactly
537   // at EOF, or by ExpectAtEnd() when it returns true.  This happens when we
538   // reach the end of a message and attempt to read another tag.
539   bool legitimate_message_end_;
540 
541   // See EnableAliasing().
542   bool aliasing_enabled_;
543 
544   // Limits
545   Limit current_limit_;   // if position = -1, no limit is applied
546 
547   // For simplicity, if the current buffer crosses a limit (either a normal
548   // limit created by PushLimit() or the total bytes limit), buffer_size_
549   // only tracks the number of bytes before that limit.  This field
550   // contains the number of bytes after it.  Note that this implies that if
551   // buffer_size_ == 0 and buffer_size_after_limit_ > 0, we know we've
552   // hit a limit.  However, if both are zero, it doesn't necessarily mean
553   // we aren't at a limit -- the buffer may have ended exactly at the limit.
554   int buffer_size_after_limit_;
555 
556   // Maximum number of bytes to read, period.  This is unrelated to
557   // current_limit_.  Set using SetTotalBytesLimit().
558   int total_bytes_limit_;
559 
560   // If positive/0: Limit for bytes read after which a warning due to size
561   // should be logged.
562   // If -1: Printing of warning disabled. Can be set by client.
563   // If -2: Internal: Limit has been reached, print full size when destructing.
564   int total_bytes_warning_threshold_;
565 
566   // Current recursion budget, controlled by IncrementRecursionDepth() and
567   // similar.  Starts at recursion_limit_ and goes down: if this reaches
568   // -1 we are over budget.
569   int recursion_budget_;
570   // Recursion depth limit, set by SetRecursionLimit().
571   int recursion_limit_;
572 
573   // See SetExtensionRegistry().
574   const DescriptorPool* extension_pool_;
575   MessageFactory* extension_factory_;
576 
577   // Private member functions.
578 
579   // Advance the buffer by a given number of bytes.
580   void Advance(int amount);
581 
582   // Back up input_ to the current buffer position.
583   void BackUpInputToCurrentPosition();
584 
585   // Recomputes the value of buffer_size_after_limit_.  Must be called after
586   // current_limit_ or total_bytes_limit_ changes.
587   void RecomputeBufferLimits();
588 
589   // Writes an error message saying that we hit total_bytes_limit_.
590   void PrintTotalBytesLimitError();
591 
592   // Called when the buffer runs out to request more data.  Implies an
593   // Advance(BufferSize()).
594   bool Refresh();
595 
596   // When parsing varints, we optimize for the common case of small values, and
597   // then optimize for the case when the varint fits within the current buffer
598   // piece. The Fallback method is used when we can't use the one-byte
599   // optimization. The Slow method is yet another fallback when the buffer is
600   // not large enough. Making the slow path out-of-line speeds up the common
601   // case by 10-15%. The slow path is fairly uncommon: it only triggers when a
602   // message crosses multiple buffers.  Note: ReadVarint32Fallback() and
603   // ReadVarint64Fallback() are called frequently and generally not inlined, so
604   // they have been optimized to avoid "out" parameters.  The former returns -1
605   // if it fails and the uint32 it read otherwise.  The latter has a bool
606   // indicating success or failure as part of its return type.
607   int64 ReadVarint32Fallback(uint32 first_byte_or_zero);
608   int ReadVarintSizeAsIntFallback();
609   std::pair<uint64, bool> ReadVarint64Fallback();
610   bool ReadVarint32Slow(uint32* value);
611   bool ReadVarint64Slow(uint64* value);
612   int ReadVarintSizeAsIntSlow();
613   bool ReadLittleEndian32Fallback(uint32* value);
614   bool ReadLittleEndian64Fallback(uint64* value);
615   // Fallback/slow methods for reading tags. These do not update last_tag_,
616   // but will set legitimate_message_end_ if we are at the end of the input
617   // stream.
618   uint32 ReadTagFallback(uint32 first_byte_or_zero);
619   uint32 ReadTagSlow();
620   bool ReadStringFallback(string* buffer, int size);
621 
622   // Return the size of the buffer.
623   int BufferSize() const;
624 
625   static const int kDefaultTotalBytesLimit = 64 << 20;  // 64MB
626 
627   static const int kDefaultTotalBytesWarningThreshold = 32 << 20;  // 32MB
628 
629   static int default_recursion_limit_;  // 100 by default.
630 };
631 
632 // Class which encodes and writes binary data which is composed of varint-
633 // encoded integers and fixed-width pieces.  Wraps a ZeroCopyOutputStream.
634 // Most users will not need to deal with CodedOutputStream.
635 //
636 // Most methods of CodedOutputStream which return a bool return false if an
637 // underlying I/O error occurs.  Once such a failure occurs, the
638 // CodedOutputStream is broken and is no longer useful. The Write* methods do
639 // not return the stream status, but will invalidate the stream if an error
640 // occurs. The client can probe HadError() to determine the status.
641 //
642 // Note that every method of CodedOutputStream which writes some data has
643 // a corresponding static "ToArray" version. These versions write directly
644 // to the provided buffer, returning a pointer past the last written byte.
645 // They require that the buffer has sufficient capacity for the encoded data.
646 // This allows an optimization where we check if an output stream has enough
647 // space for an entire message before we start writing and, if there is, we
648 // call only the ToArray methods to avoid doing bound checks for each
649 // individual value.
650 // i.e., in the example above:
651 //
652 //   CodedOutputStream coded_output = new CodedOutputStream(raw_output);
653 //   int magic_number = 1234;
654 //   char text[] = "Hello world!";
655 //
656 //   int coded_size = sizeof(magic_number) +
657 //                    CodedOutputStream::VarintSize32(strlen(text)) +
658 //                    strlen(text);
659 //
660 //   uint8* buffer =
661 //       coded_output->GetDirectBufferForNBytesAndAdvance(coded_size);
662 //   if (buffer != NULL) {
663 //     // The output stream has enough space in the buffer: write directly to
664 //     // the array.
665 //     buffer = CodedOutputStream::WriteLittleEndian32ToArray(magic_number,
666 //                                                            buffer);
667 //     buffer = CodedOutputStream::WriteVarint32ToArray(strlen(text), buffer);
668 //     buffer = CodedOutputStream::WriteRawToArray(text, strlen(text), buffer);
669 //   } else {
670 //     // Make bound-checked writes, which will ask the underlying stream for
671 //     // more space as needed.
672 //     coded_output->WriteLittleEndian32(magic_number);
673 //     coded_output->WriteVarint32(strlen(text));
674 //     coded_output->WriteRaw(text, strlen(text));
675 //   }
676 //
677 //   delete coded_output;
678 class LIBPROTOBUF_EXPORT CodedOutputStream {
679  public:
680   // Create an CodedOutputStream that writes to the given ZeroCopyOutputStream.
681   explicit CodedOutputStream(ZeroCopyOutputStream* output);
682   CodedOutputStream(ZeroCopyOutputStream* output, bool do_eager_refresh);
683 
684   // Destroy the CodedOutputStream and position the underlying
685   // ZeroCopyOutputStream immediately after the last byte written.
686   ~CodedOutputStream();
687 
688   // Trims any unused space in the underlying buffer so that its size matches
689   // the number of bytes written by this stream. The underlying buffer will
690   // automatically be trimmed when this stream is destroyed; this call is only
691   // necessary if the underlying buffer is accessed *before* the stream is
692   // destroyed.
693   void Trim();
694 
695   // Skips a number of bytes, leaving the bytes unmodified in the underlying
696   // buffer.  Returns false if an underlying write error occurs.  This is
697   // mainly useful with GetDirectBufferPointer().
698   bool Skip(int count);
699 
700   // Sets *data to point directly at the unwritten part of the
701   // CodedOutputStream's underlying buffer, and *size to the size of that
702   // buffer, but does not advance the stream's current position.  This will
703   // always either produce a non-empty buffer or return false.  If the caller
704   // writes any data to this buffer, it should then call Skip() to skip over
705   // the consumed bytes.  This may be useful for implementing external fast
706   // serialization routines for types of data not covered by the
707   // CodedOutputStream interface.
708   bool GetDirectBufferPointer(void** data, int* size);
709 
710   // If there are at least "size" bytes available in the current buffer,
711   // returns a pointer directly into the buffer and advances over these bytes.
712   // The caller may then write directly into this buffer (e.g. using the
713   // *ToArray static methods) rather than go through CodedOutputStream.  If
714   // there are not enough bytes available, returns NULL.  The return pointer is
715   // invalidated as soon as any other non-const method of CodedOutputStream
716   // is called.
717   inline uint8* GetDirectBufferForNBytesAndAdvance(int size);
718 
719   // Write raw bytes, copying them from the given buffer.
720   void WriteRaw(const void* buffer, int size);
721   // Like WriteRaw()  but will try to write aliased data if aliasing is
722   // turned on.
723   void WriteRawMaybeAliased(const void* data, int size);
724   // Like WriteRaw()  but writing directly to the target array.
725   // This is _not_ inlined, as the compiler often optimizes memcpy into inline
726   // copy loops. Since this gets called by every field with string or bytes
727   // type, inlining may lead to a significant amount of code bloat, with only a
728   // minor performance gain.
729   static uint8* WriteRawToArray(const void* buffer, int size, uint8* target);
730 
731   // Equivalent to WriteRaw(str.data(), str.size()).
732   void WriteString(const string& str);
733   // Like WriteString()  but writing directly to the target array.
734   static uint8* WriteStringToArray(const string& str, uint8* target);
735   // Write the varint-encoded size of str followed by str.
736   static uint8* WriteStringWithSizeToArray(const string& str, uint8* target);
737 
738 
739   // Instructs the CodedOutputStream to allow the underlying
740   // ZeroCopyOutputStream to hold pointers to the original structure instead of
741   // copying, if it supports it (i.e. output->AllowsAliasing() is true).  If the
742   // underlying stream does not support aliasing, then enabling it has no
743   // affect.  For now, this only affects the behavior of
744   // WriteRawMaybeAliased().
745   //
746   // NOTE: It is caller's responsibility to ensure that the chunk of memory
747   // remains live until all of the data has been consumed from the stream.
748   void EnableAliasing(bool enabled);
749 
750   // Write a 32-bit little-endian integer.
751   void WriteLittleEndian32(uint32 value);
752   // Like WriteLittleEndian32()  but writing directly to the target array.
753   static uint8* WriteLittleEndian32ToArray(uint32 value, uint8* target);
754   // Write a 64-bit little-endian integer.
755   void WriteLittleEndian64(uint64 value);
756   // Like WriteLittleEndian64()  but writing directly to the target array.
757   static uint8* WriteLittleEndian64ToArray(uint64 value, uint8* target);
758 
759   // Write an unsigned integer with Varint encoding.  Writing a 32-bit value
760   // is equivalent to casting it to uint64 and writing it as a 64-bit value,
761   // but may be more efficient.
762   void WriteVarint32(uint32 value);
763   // Like WriteVarint32()  but writing directly to the target array.
764   static uint8* WriteVarint32ToArray(uint32 value, uint8* target);
765   // Write an unsigned integer with Varint encoding.
766   void WriteVarint64(uint64 value);
767   // Like WriteVarint64()  but writing directly to the target array.
768   static uint8* WriteVarint64ToArray(uint64 value, uint8* target);
769 
770   // Equivalent to WriteVarint32() except when the value is negative,
771   // in which case it must be sign-extended to a full 10 bytes.
772   void WriteVarint32SignExtended(int32 value);
773   // Like WriteVarint32SignExtended()  but writing directly to the target array.
774   static uint8* WriteVarint32SignExtendedToArray(int32 value, uint8* target);
775 
776   // This is identical to WriteVarint32(), but optimized for writing tags.
777   // In particular, if the input is a compile-time constant, this method
778   // compiles down to a couple instructions.
779   // Always inline because otherwise the aformentioned optimization can't work,
780   // but GCC by default doesn't want to inline this.
781   void WriteTag(uint32 value);
782   // Like WriteTag()  but writing directly to the target array.
783   GOOGLE_ATTRIBUTE_ALWAYS_INLINE static uint8* WriteTagToArray(uint32 value,
784                                                         uint8* target);
785 
786   // Returns the number of bytes needed to encode the given value as a varint.
787   static int VarintSize32(uint32 value);
788   // Returns the number of bytes needed to encode the given value as a varint.
789   static int VarintSize64(uint64 value);
790 
791   // If negative, 10 bytes.  Otheriwse, same as VarintSize32().
792   static int VarintSize32SignExtended(int32 value);
793 
794   // Compile-time equivalent of VarintSize32().
795   template <uint32 Value>
796   struct StaticVarintSize32 {
797     static const int value =
798         (Value < (1 << 7))
799             ? 1
800             : (Value < (1 << 14))
801                 ? 2
802                 : (Value < (1 << 21))
803                     ? 3
804                     : (Value < (1 << 28))
805                         ? 4
806                         : 5;
807   };
808 
809   // Returns the total number of bytes written since this object was created.
810   inline int ByteCount() const;
811 
812   // Returns true if there was an underlying I/O error since this object was
813   // created.
HadError()814   bool HadError() const { return had_error_; }
815 
816   // Deterministic serialization, if requested, guarantees that for a given
817   // binary, equal messages will always be serialized to the same bytes. This
818   // implies:
819   //   . repeated serialization of a message will return the same bytes
820   //   . different processes of the same binary (which may be executing on
821   //     different machines) will serialize equal messages to the same bytes.
822   //
823   // Note the deterministic serialization is NOT canonical across languages; it
824   // is also unstable across different builds with schema changes due to unknown
825   // fields. Users who need canonical serialization, e.g., persistent storage in
826   // a canonical form, fingerprinting, etc., should define their own
827   // canonicalization specification and implement the serializer using
828   // reflection APIs rather than relying on this API.
829   //
830   // If determinisitc serialization is requested, the serializer will
831   // sort map entries by keys in lexicographical order or numerical order.
832   // (This is an implementation detail and may subject to change.)
833   //
834   // There are two ways to determine whether serialization should be
835   // deterministic for this CodedOutputStream.  If SetSerializationDeterministic
836   // has not yet been called, then the default comes from the global default,
837   // which is false, until SetDefaultSerializationDeterministic has been called.
838   // Otherwise, SetSerializationDeterministic has been called, and the last
839   // value passed to it is all that matters.
SetSerializationDeterministic(bool value)840   void SetSerializationDeterministic(bool value) {
841     serialization_deterministic_is_overridden_ = true;
842     serialization_deterministic_override_ = value;
843   }
844   // See above.  Also, note that users of this CodedOutputStream may need to
845   // call IsSerializationDeterminstic() to serialize in the intended way.  This
846   // CodedOutputStream cannot enforce a desire for deterministic serialization
847   // by itself.
IsSerializationDeterminstic()848   bool IsSerializationDeterminstic() const {
849     return serialization_deterministic_is_overridden_ ?
850         serialization_deterministic_override_ :
851         default_serialization_deterministic_;
852   }
853 
854  private:
855   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedOutputStream);
856 
857   ZeroCopyOutputStream* output_;
858   uint8* buffer_;
859   int buffer_size_;
860   int total_bytes_;  // Sum of sizes of all buffers seen so far.
861   bool had_error_;   // Whether an error occurred during output.
862   bool aliasing_enabled_;  // See EnableAliasing().
863   // See SetSerializationDeterministic() regarding these three fields.
864   bool serialization_deterministic_is_overridden_;
865   bool serialization_deterministic_override_;
866   static bool default_serialization_deterministic_;
867 
868   // Advance the buffer by a given number of bytes.
869   void Advance(int amount);
870 
871   // Called when the buffer runs out to request more data.  Implies an
872   // Advance(buffer_size_).
873   bool Refresh();
874 
875   // Like WriteRaw() but may avoid copying if the underlying
876   // ZeroCopyOutputStream supports it.
877   void WriteAliasedRaw(const void* buffer, int size);
878 
879   // If this write might cross the end of the buffer, we compose the bytes first
880   // then use WriteRaw().
881   void WriteVarint32SlowPath(uint32 value);
882 
883   // Always-inlined versions of WriteVarint* functions so that code can be
884   // reused, while still controlling size. For instance, WriteVarint32ToArray()
885   // should not directly call this: since it is inlined itself, doing so
886   // would greatly increase the size of generated code. Instead, it should call
887   // WriteVarint32FallbackToArray.  Meanwhile, WriteVarint32() is already
888   // out-of-line, so it should just invoke this directly to avoid any extra
889   // function call overhead.
890   GOOGLE_ATTRIBUTE_ALWAYS_INLINE static uint8* WriteVarint64ToArrayInline(
891       uint64 value, uint8* target);
892 
893   static int VarintSize32Fallback(uint32 value);
894 
895   // See above.  Other projects may use "friend" to allow them to call this.
SetDefaultSerializationDeterministic()896   static void SetDefaultSerializationDeterministic() {
897     default_serialization_deterministic_ = true;
898   }
899 };
900 
901 // inline methods ====================================================
902 // The vast majority of varints are only one byte.  These inline
903 // methods optimize for that case.
904 
ReadVarint32(uint32 * value)905 inline bool CodedInputStream::ReadVarint32(uint32* value) {
906   uint32 v = 0;
907   if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
908     v = *buffer_;
909     if (v < 0x80) {
910       *value = v;
911       Advance(1);
912       return true;
913     }
914   }
915   int64 result = ReadVarint32Fallback(v);
916   *value = static_cast<uint32>(result);
917   return result >= 0;
918 }
919 
ReadVarint64(uint64 * value)920 inline bool CodedInputStream::ReadVarint64(uint64* value) {
921   if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) {
922     *value = *buffer_;
923     Advance(1);
924     return true;
925   }
926   std::pair<uint64, bool> p = ReadVarint64Fallback();
927   *value = p.first;
928   return p.second;
929 }
930 
ReadVarintSizeAsInt(int * value)931 inline bool CodedInputStream::ReadVarintSizeAsInt(int* value) {
932   if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
933     int v = *buffer_;
934     if (v < 0x80) {
935       *value = v;
936       Advance(1);
937       return true;
938     }
939   }
940   *value = ReadVarintSizeAsIntFallback();
941   return *value >= 0;
942 }
943 
944 // static
ReadLittleEndian32FromArray(const uint8 * buffer,uint32 * value)945 inline const uint8* CodedInputStream::ReadLittleEndian32FromArray(
946     const uint8* buffer,
947     uint32* value) {
948 #if defined(PROTOBUF_LITTLE_ENDIAN)
949   memcpy(value, buffer, sizeof(*value));
950   return buffer + sizeof(*value);
951 #else
952   *value = (static_cast<uint32>(buffer[0])      ) |
953            (static_cast<uint32>(buffer[1]) <<  8) |
954            (static_cast<uint32>(buffer[2]) << 16) |
955            (static_cast<uint32>(buffer[3]) << 24);
956   return buffer + sizeof(*value);
957 #endif
958 }
959 // static
ReadLittleEndian64FromArray(const uint8 * buffer,uint64 * value)960 inline const uint8* CodedInputStream::ReadLittleEndian64FromArray(
961     const uint8* buffer,
962     uint64* value) {
963 #if defined(PROTOBUF_LITTLE_ENDIAN)
964   memcpy(value, buffer, sizeof(*value));
965   return buffer + sizeof(*value);
966 #else
967   uint32 part0 = (static_cast<uint32>(buffer[0])      ) |
968                  (static_cast<uint32>(buffer[1]) <<  8) |
969                  (static_cast<uint32>(buffer[2]) << 16) |
970                  (static_cast<uint32>(buffer[3]) << 24);
971   uint32 part1 = (static_cast<uint32>(buffer[4])      ) |
972                  (static_cast<uint32>(buffer[5]) <<  8) |
973                  (static_cast<uint32>(buffer[6]) << 16) |
974                  (static_cast<uint32>(buffer[7]) << 24);
975   *value = static_cast<uint64>(part0) |
976           (static_cast<uint64>(part1) << 32);
977   return buffer + sizeof(*value);
978 #endif
979 }
980 
ReadLittleEndian32(uint32 * value)981 inline bool CodedInputStream::ReadLittleEndian32(uint32* value) {
982 #if defined(PROTOBUF_LITTLE_ENDIAN)
983   if (GOOGLE_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
984     memcpy(value, buffer_, sizeof(*value));
985     Advance(sizeof(*value));
986     return true;
987   } else {
988     return ReadLittleEndian32Fallback(value);
989   }
990 #else
991   return ReadLittleEndian32Fallback(value);
992 #endif
993 }
994 
ReadLittleEndian64(uint64 * value)995 inline bool CodedInputStream::ReadLittleEndian64(uint64* value) {
996 #if defined(PROTOBUF_LITTLE_ENDIAN)
997   if (GOOGLE_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
998     memcpy(value, buffer_, sizeof(*value));
999     Advance(sizeof(*value));
1000     return true;
1001   } else {
1002     return ReadLittleEndian64Fallback(value);
1003   }
1004 #else
1005   return ReadLittleEndian64Fallback(value);
1006 #endif
1007 }
1008 
ReadTag()1009 inline uint32 CodedInputStream::ReadTag() {
1010   uint32 v = 0;
1011   if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
1012     v = *buffer_;
1013     if (v < 0x80) {
1014       last_tag_ = v;
1015       Advance(1);
1016       return v;
1017     }
1018   }
1019   last_tag_ = ReadTagFallback(v);
1020   return last_tag_;
1021 }
1022 
ReadTagWithCutoff(uint32 cutoff)1023 inline std::pair<uint32, bool> CodedInputStream::ReadTagWithCutoff(
1024     uint32 cutoff) {
1025   // In performance-sensitive code we can expect cutoff to be a compile-time
1026   // constant, and things like "cutoff >= kMax1ByteVarint" to be evaluated at
1027   // compile time.
1028   uint32 first_byte_or_zero = 0;
1029   if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
1030     // Hot case: buffer_ non_empty, buffer_[0] in [1, 128).
1031     // TODO(gpike): Is it worth rearranging this? E.g., if the number of fields
1032     // is large enough then is it better to check for the two-byte case first?
1033     first_byte_or_zero = buffer_[0];
1034     if (static_cast<int8>(buffer_[0]) > 0) {
1035       const uint32 kMax1ByteVarint = 0x7f;
1036       uint32 tag = last_tag_ = buffer_[0];
1037       Advance(1);
1038       return std::make_pair(tag, cutoff >= kMax1ByteVarint || tag <= cutoff);
1039     }
1040     // Other hot case: cutoff >= 0x80, buffer_ has at least two bytes available,
1041     // and tag is two bytes.  The latter is tested by bitwise-and-not of the
1042     // first byte and the second byte.
1043     if (cutoff >= 0x80 &&
1044         GOOGLE_PREDICT_TRUE(buffer_ + 1 < buffer_end_) &&
1045         GOOGLE_PREDICT_TRUE((buffer_[0] & ~buffer_[1]) >= 0x80)) {
1046       const uint32 kMax2ByteVarint = (0x7f << 7) + 0x7f;
1047       uint32 tag = last_tag_ = (1u << 7) * buffer_[1] + (buffer_[0] - 0x80);
1048       Advance(2);
1049       // It might make sense to test for tag == 0 now, but it is so rare that
1050       // that we don't bother.  A varint-encoded 0 should be one byte unless
1051       // the encoder lost its mind.  The second part of the return value of
1052       // this function is allowed to be either true or false if the tag is 0,
1053       // so we don't have to check for tag == 0.  We may need to check whether
1054       // it exceeds cutoff.
1055       bool at_or_below_cutoff = cutoff >= kMax2ByteVarint || tag <= cutoff;
1056       return std::make_pair(tag, at_or_below_cutoff);
1057     }
1058   }
1059   // Slow path
1060   last_tag_ = ReadTagFallback(first_byte_or_zero);
1061   return std::make_pair(last_tag_, static_cast<uint32>(last_tag_ - 1) < cutoff);
1062 }
1063 
LastTagWas(uint32 expected)1064 inline bool CodedInputStream::LastTagWas(uint32 expected) {
1065   return last_tag_ == expected;
1066 }
1067 
ConsumedEntireMessage()1068 inline bool CodedInputStream::ConsumedEntireMessage() {
1069   return legitimate_message_end_;
1070 }
1071 
ExpectTag(uint32 expected)1072 inline bool CodedInputStream::ExpectTag(uint32 expected) {
1073   if (expected < (1 << 7)) {
1074     if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && buffer_[0] == expected) {
1075       Advance(1);
1076       return true;
1077     } else {
1078       return false;
1079     }
1080   } else if (expected < (1 << 14)) {
1081     if (GOOGLE_PREDICT_TRUE(BufferSize() >= 2) &&
1082         buffer_[0] == static_cast<uint8>(expected | 0x80) &&
1083         buffer_[1] == static_cast<uint8>(expected >> 7)) {
1084       Advance(2);
1085       return true;
1086     } else {
1087       return false;
1088     }
1089   } else {
1090     // Don't bother optimizing for larger values.
1091     return false;
1092   }
1093 }
1094 
ExpectTagFromArray(const uint8 * buffer,uint32 expected)1095 inline const uint8* CodedInputStream::ExpectTagFromArray(
1096     const uint8* buffer, uint32 expected) {
1097   if (expected < (1 << 7)) {
1098     if (buffer[0] == expected) {
1099       return buffer + 1;
1100     }
1101   } else if (expected < (1 << 14)) {
1102     if (buffer[0] == static_cast<uint8>(expected | 0x80) &&
1103         buffer[1] == static_cast<uint8>(expected >> 7)) {
1104       return buffer + 2;
1105     }
1106   }
1107   return NULL;
1108 }
1109 
GetDirectBufferPointerInline(const void ** data,int * size)1110 inline void CodedInputStream::GetDirectBufferPointerInline(const void** data,
1111                                                            int* size) {
1112   *data = buffer_;
1113   *size = static_cast<int>(buffer_end_ - buffer_);
1114 }
1115 
ExpectAtEnd()1116 inline bool CodedInputStream::ExpectAtEnd() {
1117   // If we are at a limit we know no more bytes can be read.  Otherwise, it's
1118   // hard to say without calling Refresh(), and we'd rather not do that.
1119 
1120   if (buffer_ == buffer_end_ &&
1121       ((buffer_size_after_limit_ != 0) ||
1122        (total_bytes_read_ == current_limit_))) {
1123     last_tag_ = 0;                   // Pretend we called ReadTag()...
1124     legitimate_message_end_ = true;  // ... and it hit EOF.
1125     return true;
1126   } else {
1127     return false;
1128   }
1129 }
1130 
CurrentPosition()1131 inline int CodedInputStream::CurrentPosition() const {
1132   return total_bytes_read_ - (BufferSize() + buffer_size_after_limit_);
1133 }
1134 
GetDirectBufferForNBytesAndAdvance(int size)1135 inline uint8* CodedOutputStream::GetDirectBufferForNBytesAndAdvance(int size) {
1136   if (buffer_size_ < size) {
1137     return NULL;
1138   } else {
1139     uint8* result = buffer_;
1140     Advance(size);
1141     return result;
1142   }
1143 }
1144 
WriteVarint32ToArray(uint32 value,uint8 * target)1145 inline uint8* CodedOutputStream::WriteVarint32ToArray(uint32 value,
1146                                                       uint8* target) {
1147   while (value >= 0x80) {
1148     *target = static_cast<uint8>(value | 0x80);
1149     value >>= 7;
1150     ++target;
1151   }
1152   *target = static_cast<uint8>(value);
1153   return target + 1;
1154 }
1155 
WriteVarint32SignExtended(int32 value)1156 inline void CodedOutputStream::WriteVarint32SignExtended(int32 value) {
1157   if (value < 0) {
1158     WriteVarint64(static_cast<uint64>(value));
1159   } else {
1160     WriteVarint32(static_cast<uint32>(value));
1161   }
1162 }
1163 
WriteVarint32SignExtendedToArray(int32 value,uint8 * target)1164 inline uint8* CodedOutputStream::WriteVarint32SignExtendedToArray(
1165     int32 value, uint8* target) {
1166   if (value < 0) {
1167     return WriteVarint64ToArray(static_cast<uint64>(value), target);
1168   } else {
1169     return WriteVarint32ToArray(static_cast<uint32>(value), target);
1170   }
1171 }
1172 
WriteLittleEndian32ToArray(uint32 value,uint8 * target)1173 inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value,
1174                                                             uint8* target) {
1175 #if defined(PROTOBUF_LITTLE_ENDIAN)
1176   memcpy(target, &value, sizeof(value));
1177 #else
1178   target[0] = static_cast<uint8>(value);
1179   target[1] = static_cast<uint8>(value >>  8);
1180   target[2] = static_cast<uint8>(value >> 16);
1181   target[3] = static_cast<uint8>(value >> 24);
1182 #endif
1183   return target + sizeof(value);
1184 }
1185 
WriteLittleEndian64ToArray(uint64 value,uint8 * target)1186 inline uint8* CodedOutputStream::WriteLittleEndian64ToArray(uint64 value,
1187                                                             uint8* target) {
1188 #if defined(PROTOBUF_LITTLE_ENDIAN)
1189   memcpy(target, &value, sizeof(value));
1190 #else
1191   uint32 part0 = static_cast<uint32>(value);
1192   uint32 part1 = static_cast<uint32>(value >> 32);
1193 
1194   target[0] = static_cast<uint8>(part0);
1195   target[1] = static_cast<uint8>(part0 >>  8);
1196   target[2] = static_cast<uint8>(part0 >> 16);
1197   target[3] = static_cast<uint8>(part0 >> 24);
1198   target[4] = static_cast<uint8>(part1);
1199   target[5] = static_cast<uint8>(part1 >>  8);
1200   target[6] = static_cast<uint8>(part1 >> 16);
1201   target[7] = static_cast<uint8>(part1 >> 24);
1202 #endif
1203   return target + sizeof(value);
1204 }
1205 
WriteVarint32(uint32 value)1206 inline void CodedOutputStream::WriteVarint32(uint32 value) {
1207   if (buffer_size_ >= 5) {
1208     // Fast path:  We have enough bytes left in the buffer to guarantee that
1209     // this write won't cross the end, so we can skip the checks.
1210     uint8* target = buffer_;
1211     uint8* end = WriteVarint32ToArray(value, target);
1212     int size = static_cast<int>(end - target);
1213     Advance(size);
1214   } else {
1215     WriteVarint32SlowPath(value);
1216   }
1217 }
1218 
WriteTag(uint32 value)1219 inline void CodedOutputStream::WriteTag(uint32 value) {
1220   WriteVarint32(value);
1221 }
1222 
WriteTagToArray(uint32 value,uint8 * target)1223 inline uint8* CodedOutputStream::WriteTagToArray(
1224     uint32 value, uint8* target) {
1225   return WriteVarint32ToArray(value, target);
1226 }
1227 
VarintSize32(uint32 value)1228 inline int CodedOutputStream::VarintSize32(uint32 value) {
1229   if (value < (1 << 7)) {
1230     return 1;
1231   } else  {
1232     return VarintSize32Fallback(value);
1233   }
1234 }
1235 
VarintSize32SignExtended(int32 value)1236 inline int CodedOutputStream::VarintSize32SignExtended(int32 value) {
1237   if (value < 0) {
1238     return 10;     // TODO(kenton):  Make this a symbolic constant.
1239   } else {
1240     return VarintSize32(static_cast<uint32>(value));
1241   }
1242 }
1243 
WriteString(const string & str)1244 inline void CodedOutputStream::WriteString(const string& str) {
1245   WriteRaw(str.data(), static_cast<int>(str.size()));
1246 }
1247 
WriteRawMaybeAliased(const void * data,int size)1248 inline void CodedOutputStream::WriteRawMaybeAliased(
1249     const void* data, int size) {
1250   if (aliasing_enabled_) {
1251     WriteAliasedRaw(data, size);
1252   } else {
1253     WriteRaw(data, size);
1254   }
1255 }
1256 
WriteStringToArray(const string & str,uint8 * target)1257 inline uint8* CodedOutputStream::WriteStringToArray(
1258     const string& str, uint8* target) {
1259   return WriteRawToArray(str.data(), static_cast<int>(str.size()), target);
1260 }
1261 
ByteCount()1262 inline int CodedOutputStream::ByteCount() const {
1263   return total_bytes_ - buffer_size_;
1264 }
1265 
Advance(int amount)1266 inline void CodedInputStream::Advance(int amount) {
1267   buffer_ += amount;
1268 }
1269 
Advance(int amount)1270 inline void CodedOutputStream::Advance(int amount) {
1271   buffer_ += amount;
1272   buffer_size_ -= amount;
1273 }
1274 
SetRecursionLimit(int limit)1275 inline void CodedInputStream::SetRecursionLimit(int limit) {
1276   recursion_budget_ += limit - recursion_limit_;
1277   recursion_limit_ = limit;
1278 }
1279 
IncrementRecursionDepth()1280 inline bool CodedInputStream::IncrementRecursionDepth() {
1281   --recursion_budget_;
1282   return recursion_budget_ >= 0;
1283 }
1284 
DecrementRecursionDepth()1285 inline void CodedInputStream::DecrementRecursionDepth() {
1286   if (recursion_budget_ < recursion_limit_) ++recursion_budget_;
1287 }
1288 
UnsafeDecrementRecursionDepth()1289 inline void CodedInputStream::UnsafeDecrementRecursionDepth() {
1290   assert(recursion_budget_ < recursion_limit_);
1291   ++recursion_budget_;
1292 }
1293 
SetExtensionRegistry(const DescriptorPool * pool,MessageFactory * factory)1294 inline void CodedInputStream::SetExtensionRegistry(const DescriptorPool* pool,
1295                                                    MessageFactory* factory) {
1296   extension_pool_ = pool;
1297   extension_factory_ = factory;
1298 }
1299 
GetExtensionPool()1300 inline const DescriptorPool* CodedInputStream::GetExtensionPool() {
1301   return extension_pool_;
1302 }
1303 
GetExtensionFactory()1304 inline MessageFactory* CodedInputStream::GetExtensionFactory() {
1305   return extension_factory_;
1306 }
1307 
BufferSize()1308 inline int CodedInputStream::BufferSize() const {
1309   return static_cast<int>(buffer_end_ - buffer_);
1310 }
1311 
CodedInputStream(ZeroCopyInputStream * input)1312 inline CodedInputStream::CodedInputStream(ZeroCopyInputStream* input)
1313   : buffer_(NULL),
1314     buffer_end_(NULL),
1315     input_(input),
1316     total_bytes_read_(0),
1317     overflow_bytes_(0),
1318     last_tag_(0),
1319     legitimate_message_end_(false),
1320     aliasing_enabled_(false),
1321     current_limit_(kint32max),
1322     buffer_size_after_limit_(0),
1323     total_bytes_limit_(kDefaultTotalBytesLimit),
1324     total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
1325     recursion_budget_(default_recursion_limit_),
1326     recursion_limit_(default_recursion_limit_),
1327     extension_pool_(NULL),
1328     extension_factory_(NULL) {
1329   // Eagerly Refresh() so buffer space is immediately available.
1330   Refresh();
1331 }
1332 
CodedInputStream(const uint8 * buffer,int size)1333 inline CodedInputStream::CodedInputStream(const uint8* buffer, int size)
1334   : buffer_(buffer),
1335     buffer_end_(buffer + size),
1336     input_(NULL),
1337     total_bytes_read_(size),
1338     overflow_bytes_(0),
1339     last_tag_(0),
1340     legitimate_message_end_(false),
1341     aliasing_enabled_(false),
1342     current_limit_(size),
1343     buffer_size_after_limit_(0),
1344     total_bytes_limit_(kDefaultTotalBytesLimit),
1345     total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
1346     recursion_budget_(default_recursion_limit_),
1347     recursion_limit_(default_recursion_limit_),
1348     extension_pool_(NULL),
1349     extension_factory_(NULL) {
1350   // Note that setting current_limit_ == size is important to prevent some
1351   // code paths from trying to access input_ and segfaulting.
1352 }
1353 
IsFlat()1354 inline bool CodedInputStream::IsFlat() const {
1355   return input_ == NULL;
1356 }
1357 
1358 }  // namespace io
1359 }  // namespace protobuf
1360 
1361 
1362 #if _MSC_VER >= 1300 && !defined(__INTEL_COMPILER)
1363   #pragma runtime_checks("c", restore)
1364 #endif  // _MSC_VER && !defined(__INTEL_COMPILER)
1365 
1366 }  // namespace google
1367 #endif  // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
1368