1# Protocol Buffers - Google's data interchange format 2# Copyright 2008 Google Inc. All rights reserved. 3# http://code.google.com/p/protobuf/ 4# 5# Redistribution and use in source and binary forms, with or without 6# modification, are permitted provided that the following conditions are 7# met: 8# 9# * Redistributions of source code must retain the above copyright 10# notice, this list of conditions and the following disclaimer. 11# * Redistributions in binary form must reproduce the above 12# copyright notice, this list of conditions and the following disclaimer 13# in the documentation and/or other materials provided with the 14# distribution. 15# * Neither the name of Google Inc. nor the names of its 16# contributors may be used to endorse or promote products derived from 17# this software without specific prior written permission. 18# 19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31"""Code for decoding protocol buffer primitives. 32 33This code is very similar to encoder.py -- read the docs for that module first. 34 35A "decoder" is a function with the signature: 36 Decode(buffer, pos, end, message, field_dict) 37The arguments are: 38 buffer: The string containing the encoded message. 39 pos: The current position in the string. 40 end: The position in the string where the current message ends. May be 41 less than len(buffer) if we're reading a sub-message. 42 message: The message object into which we're parsing. 43 field_dict: message._fields (avoids a hashtable lookup). 44The decoder reads the field and stores it into field_dict, returning the new 45buffer position. A decoder for a repeated field may proactively decode all of 46the elements of that field, if they appear consecutively. 47 48Note that decoders may throw any of the following: 49 IndexError: Indicates a truncated message. 50 struct.error: Unpacking of a fixed-width field failed. 51 message.DecodeError: Other errors. 52 53Decoders are expected to raise an exception if they are called with pos > end. 54This allows callers to be lax about bounds checking: it's fineto read past 55"end" as long as you are sure that someone else will notice and throw an 56exception later on. 57 58Something up the call stack is expected to catch IndexError and struct.error 59and convert them to message.DecodeError. 60 61Decoders are constructed using decoder constructors with the signature: 62 MakeDecoder(field_number, is_repeated, is_packed, key, new_default) 63The arguments are: 64 field_number: The field number of the field we want to decode. 65 is_repeated: Is the field a repeated field? (bool) 66 is_packed: Is the field a packed field? (bool) 67 key: The key to use when looking up the field within field_dict. 68 (This is actually the FieldDescriptor but nothing in this 69 file should depend on that.) 70 new_default: A function which takes a message object as a parameter and 71 returns a new instance of the default value for this field. 72 (This is called for repeated fields and sub-messages, when an 73 instance does not already exist.) 74 75As with encoders, we define a decoder constructor for every type of field. 76Then, for every field of every message class we construct an actual decoder. 77That decoder goes into a dict indexed by tag, so when we decode a message 78we repeatedly read a tag, look up the corresponding decoder, and invoke it. 79""" 80 81__author__ = 'kenton@google.com (Kenton Varda)' 82 83import struct 84from google.protobuf.internal import encoder 85from google.protobuf.internal import wire_format 86from google.protobuf import message 87 88 89# This will overflow and thus become IEEE-754 "infinity". We would use 90# "float('inf')" but it doesn't work on Windows pre-Python-2.6. 91_POS_INF = 1e10000 92_NEG_INF = -_POS_INF 93_NAN = _POS_INF * 0 94 95 96# This is not for optimization, but rather to avoid conflicts with local 97# variables named "message". 98_DecodeError = message.DecodeError 99 100 101def _VarintDecoder(mask): 102 """Return an encoder for a basic varint value (does not include tag). 103 104 Decoded values will be bitwise-anded with the given mask before being 105 returned, e.g. to limit them to 32 bits. The returned decoder does not 106 take the usual "end" parameter -- the caller is expected to do bounds checking 107 after the fact (often the caller can defer such checking until later). The 108 decoder returns a (value, new_pos) pair. 109 """ 110 111 local_ord = ord 112 def DecodeVarint(buffer, pos): 113 result = 0 114 shift = 0 115 while 1: 116 b = local_ord(buffer[pos]) 117 result |= ((b & 0x7f) << shift) 118 pos += 1 119 if not (b & 0x80): 120 result &= mask 121 return (result, pos) 122 shift += 7 123 if shift >= 64: 124 raise _DecodeError('Too many bytes when decoding varint.') 125 return DecodeVarint 126 127 128def _SignedVarintDecoder(mask): 129 """Like _VarintDecoder() but decodes signed values.""" 130 131 local_ord = ord 132 def DecodeVarint(buffer, pos): 133 result = 0 134 shift = 0 135 while 1: 136 b = local_ord(buffer[pos]) 137 result |= ((b & 0x7f) << shift) 138 pos += 1 139 if not (b & 0x80): 140 if result > 0x7fffffffffffffff: 141 result -= (1 << 64) 142 result |= ~mask 143 else: 144 result &= mask 145 return (result, pos) 146 shift += 7 147 if shift >= 64: 148 raise _DecodeError('Too many bytes when decoding varint.') 149 return DecodeVarint 150 151 152_DecodeVarint = _VarintDecoder((1 << 64) - 1) 153_DecodeSignedVarint = _SignedVarintDecoder((1 << 64) - 1) 154 155# Use these versions for values which must be limited to 32 bits. 156_DecodeVarint32 = _VarintDecoder((1 << 32) - 1) 157_DecodeSignedVarint32 = _SignedVarintDecoder((1 << 32) - 1) 158 159 160def ReadTag(buffer, pos): 161 """Read a tag from the buffer, and return a (tag_bytes, new_pos) tuple. 162 163 We return the raw bytes of the tag rather than decoding them. The raw 164 bytes can then be used to look up the proper decoder. This effectively allows 165 us to trade some work that would be done in pure-python (decoding a varint) 166 for work that is done in C (searching for a byte string in a hash table). 167 In a low-level language it would be much cheaper to decode the varint and 168 use that, but not in Python. 169 """ 170 171 start = pos 172 while ord(buffer[pos]) & 0x80: 173 pos += 1 174 pos += 1 175 return (buffer[start:pos], pos) 176 177 178# -------------------------------------------------------------------- 179 180 181def _SimpleDecoder(wire_type, decode_value): 182 """Return a constructor for a decoder for fields of a particular type. 183 184 Args: 185 wire_type: The field's wire type. 186 decode_value: A function which decodes an individual value, e.g. 187 _DecodeVarint() 188 """ 189 190 def SpecificDecoder(field_number, is_repeated, is_packed, key, new_default): 191 if is_packed: 192 local_DecodeVarint = _DecodeVarint 193 def DecodePackedField(buffer, pos, end, message, field_dict): 194 value = field_dict.get(key) 195 if value is None: 196 value = field_dict.setdefault(key, new_default(message)) 197 (endpoint, pos) = local_DecodeVarint(buffer, pos) 198 endpoint += pos 199 if endpoint > end: 200 raise _DecodeError('Truncated message.') 201 while pos < endpoint: 202 (element, pos) = decode_value(buffer, pos) 203 value.append(element) 204 if pos > endpoint: 205 del value[-1] # Discard corrupt value. 206 raise _DecodeError('Packed element was truncated.') 207 return pos 208 return DecodePackedField 209 elif is_repeated: 210 tag_bytes = encoder.TagBytes(field_number, wire_type) 211 tag_len = len(tag_bytes) 212 def DecodeRepeatedField(buffer, pos, end, message, field_dict): 213 value = field_dict.get(key) 214 if value is None: 215 value = field_dict.setdefault(key, new_default(message)) 216 while 1: 217 (element, new_pos) = decode_value(buffer, pos) 218 value.append(element) 219 # Predict that the next tag is another copy of the same repeated 220 # field. 221 pos = new_pos + tag_len 222 if buffer[new_pos:pos] != tag_bytes or new_pos >= end: 223 # Prediction failed. Return. 224 if new_pos > end: 225 raise _DecodeError('Truncated message.') 226 return new_pos 227 return DecodeRepeatedField 228 else: 229 def DecodeField(buffer, pos, end, message, field_dict): 230 (field_dict[key], pos) = decode_value(buffer, pos) 231 if pos > end: 232 del field_dict[key] # Discard corrupt value. 233 raise _DecodeError('Truncated message.') 234 return pos 235 return DecodeField 236 237 return SpecificDecoder 238 239 240def _ModifiedDecoder(wire_type, decode_value, modify_value): 241 """Like SimpleDecoder but additionally invokes modify_value on every value 242 before storing it. Usually modify_value is ZigZagDecode. 243 """ 244 245 # Reusing _SimpleDecoder is slightly slower than copying a bunch of code, but 246 # not enough to make a significant difference. 247 248 def InnerDecode(buffer, pos): 249 (result, new_pos) = decode_value(buffer, pos) 250 return (modify_value(result), new_pos) 251 return _SimpleDecoder(wire_type, InnerDecode) 252 253 254def _StructPackDecoder(wire_type, format): 255 """Return a constructor for a decoder for a fixed-width field. 256 257 Args: 258 wire_type: The field's wire type. 259 format: The format string to pass to struct.unpack(). 260 """ 261 262 value_size = struct.calcsize(format) 263 local_unpack = struct.unpack 264 265 # Reusing _SimpleDecoder is slightly slower than copying a bunch of code, but 266 # not enough to make a significant difference. 267 268 # Note that we expect someone up-stack to catch struct.error and convert 269 # it to _DecodeError -- this way we don't have to set up exception- 270 # handling blocks every time we parse one value. 271 272 def InnerDecode(buffer, pos): 273 new_pos = pos + value_size 274 result = local_unpack(format, buffer[pos:new_pos])[0] 275 return (result, new_pos) 276 return _SimpleDecoder(wire_type, InnerDecode) 277 278 279def _FloatDecoder(): 280 """Returns a decoder for a float field. 281 282 This code works around a bug in struct.unpack for non-finite 32-bit 283 floating-point values. 284 """ 285 286 local_unpack = struct.unpack 287 288 def InnerDecode(buffer, pos): 289 # We expect a 32-bit value in little-endian byte order. Bit 1 is the sign 290 # bit, bits 2-9 represent the exponent, and bits 10-32 are the significand. 291 new_pos = pos + 4 292 float_bytes = buffer[pos:new_pos] 293 294 # If this value has all its exponent bits set, then it's non-finite. 295 # In Python 2.4, struct.unpack will convert it to a finite 64-bit value. 296 # To avoid that, we parse it specially. 297 if ((float_bytes[3] in '\x7F\xFF') 298 and (float_bytes[2] >= '\x80')): 299 # If at least one significand bit is set... 300 if float_bytes[0:3] != '\x00\x00\x80': 301 return (_NAN, new_pos) 302 # If sign bit is set... 303 if float_bytes[3] == '\xFF': 304 return (_NEG_INF, new_pos) 305 return (_POS_INF, new_pos) 306 307 # Note that we expect someone up-stack to catch struct.error and convert 308 # it to _DecodeError -- this way we don't have to set up exception- 309 # handling blocks every time we parse one value. 310 result = local_unpack('<f', float_bytes)[0] 311 return (result, new_pos) 312 return _SimpleDecoder(wire_format.WIRETYPE_FIXED32, InnerDecode) 313 314 315def _DoubleDecoder(): 316 """Returns a decoder for a double field. 317 318 This code works around a bug in struct.unpack for not-a-number. 319 """ 320 321 local_unpack = struct.unpack 322 323 def InnerDecode(buffer, pos): 324 # We expect a 64-bit value in little-endian byte order. Bit 1 is the sign 325 # bit, bits 2-12 represent the exponent, and bits 13-64 are the significand. 326 new_pos = pos + 8 327 double_bytes = buffer[pos:new_pos] 328 329 # If this value has all its exponent bits set and at least one significand 330 # bit set, it's not a number. In Python 2.4, struct.unpack will treat it 331 # as inf or -inf. To avoid that, we treat it specially. 332 if ((double_bytes[7] in '\x7F\xFF') 333 and (double_bytes[6] >= '\xF0') 334 and (double_bytes[0:7] != '\x00\x00\x00\x00\x00\x00\xF0')): 335 return (_NAN, new_pos) 336 337 # Note that we expect someone up-stack to catch struct.error and convert 338 # it to _DecodeError -- this way we don't have to set up exception- 339 # handling blocks every time we parse one value. 340 result = local_unpack('<d', double_bytes)[0] 341 return (result, new_pos) 342 return _SimpleDecoder(wire_format.WIRETYPE_FIXED64, InnerDecode) 343 344 345# -------------------------------------------------------------------- 346 347 348Int32Decoder = EnumDecoder = _SimpleDecoder( 349 wire_format.WIRETYPE_VARINT, _DecodeSignedVarint32) 350 351Int64Decoder = _SimpleDecoder( 352 wire_format.WIRETYPE_VARINT, _DecodeSignedVarint) 353 354UInt32Decoder = _SimpleDecoder(wire_format.WIRETYPE_VARINT, _DecodeVarint32) 355UInt64Decoder = _SimpleDecoder(wire_format.WIRETYPE_VARINT, _DecodeVarint) 356 357SInt32Decoder = _ModifiedDecoder( 358 wire_format.WIRETYPE_VARINT, _DecodeVarint32, wire_format.ZigZagDecode) 359SInt64Decoder = _ModifiedDecoder( 360 wire_format.WIRETYPE_VARINT, _DecodeVarint, wire_format.ZigZagDecode) 361 362# Note that Python conveniently guarantees that when using the '<' prefix on 363# formats, they will also have the same size across all platforms (as opposed 364# to without the prefix, where their sizes depend on the C compiler's basic 365# type sizes). 366Fixed32Decoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED32, '<I') 367Fixed64Decoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED64, '<Q') 368SFixed32Decoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED32, '<i') 369SFixed64Decoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED64, '<q') 370FloatDecoder = _FloatDecoder() 371DoubleDecoder = _DoubleDecoder() 372 373BoolDecoder = _ModifiedDecoder( 374 wire_format.WIRETYPE_VARINT, _DecodeVarint, bool) 375 376 377def StringDecoder(field_number, is_repeated, is_packed, key, new_default): 378 """Returns a decoder for a string field.""" 379 380 local_DecodeVarint = _DecodeVarint 381 local_unicode = unicode 382 383 assert not is_packed 384 if is_repeated: 385 tag_bytes = encoder.TagBytes(field_number, 386 wire_format.WIRETYPE_LENGTH_DELIMITED) 387 tag_len = len(tag_bytes) 388 def DecodeRepeatedField(buffer, pos, end, message, field_dict): 389 value = field_dict.get(key) 390 if value is None: 391 value = field_dict.setdefault(key, new_default(message)) 392 while 1: 393 (size, pos) = local_DecodeVarint(buffer, pos) 394 new_pos = pos + size 395 if new_pos > end: 396 raise _DecodeError('Truncated string.') 397 value.append(local_unicode(buffer[pos:new_pos], 'utf-8')) 398 # Predict that the next tag is another copy of the same repeated field. 399 pos = new_pos + tag_len 400 if buffer[new_pos:pos] != tag_bytes or new_pos == end: 401 # Prediction failed. Return. 402 return new_pos 403 return DecodeRepeatedField 404 else: 405 def DecodeField(buffer, pos, end, message, field_dict): 406 (size, pos) = local_DecodeVarint(buffer, pos) 407 new_pos = pos + size 408 if new_pos > end: 409 raise _DecodeError('Truncated string.') 410 field_dict[key] = local_unicode(buffer[pos:new_pos], 'utf-8') 411 return new_pos 412 return DecodeField 413 414 415def BytesDecoder(field_number, is_repeated, is_packed, key, new_default): 416 """Returns a decoder for a bytes field.""" 417 418 local_DecodeVarint = _DecodeVarint 419 420 assert not is_packed 421 if is_repeated: 422 tag_bytes = encoder.TagBytes(field_number, 423 wire_format.WIRETYPE_LENGTH_DELIMITED) 424 tag_len = len(tag_bytes) 425 def DecodeRepeatedField(buffer, pos, end, message, field_dict): 426 value = field_dict.get(key) 427 if value is None: 428 value = field_dict.setdefault(key, new_default(message)) 429 while 1: 430 (size, pos) = local_DecodeVarint(buffer, pos) 431 new_pos = pos + size 432 if new_pos > end: 433 raise _DecodeError('Truncated string.') 434 value.append(buffer[pos:new_pos]) 435 # Predict that the next tag is another copy of the same repeated field. 436 pos = new_pos + tag_len 437 if buffer[new_pos:pos] != tag_bytes or new_pos == end: 438 # Prediction failed. Return. 439 return new_pos 440 return DecodeRepeatedField 441 else: 442 def DecodeField(buffer, pos, end, message, field_dict): 443 (size, pos) = local_DecodeVarint(buffer, pos) 444 new_pos = pos + size 445 if new_pos > end: 446 raise _DecodeError('Truncated string.') 447 field_dict[key] = buffer[pos:new_pos] 448 return new_pos 449 return DecodeField 450 451 452def GroupDecoder(field_number, is_repeated, is_packed, key, new_default): 453 """Returns a decoder for a group field.""" 454 455 end_tag_bytes = encoder.TagBytes(field_number, 456 wire_format.WIRETYPE_END_GROUP) 457 end_tag_len = len(end_tag_bytes) 458 459 assert not is_packed 460 if is_repeated: 461 tag_bytes = encoder.TagBytes(field_number, 462 wire_format.WIRETYPE_START_GROUP) 463 tag_len = len(tag_bytes) 464 def DecodeRepeatedField(buffer, pos, end, message, field_dict): 465 value = field_dict.get(key) 466 if value is None: 467 value = field_dict.setdefault(key, new_default(message)) 468 while 1: 469 value = field_dict.get(key) 470 if value is None: 471 value = field_dict.setdefault(key, new_default(message)) 472 # Read sub-message. 473 pos = value.add()._InternalParse(buffer, pos, end) 474 # Read end tag. 475 new_pos = pos+end_tag_len 476 if buffer[pos:new_pos] != end_tag_bytes or new_pos > end: 477 raise _DecodeError('Missing group end tag.') 478 # Predict that the next tag is another copy of the same repeated field. 479 pos = new_pos + tag_len 480 if buffer[new_pos:pos] != tag_bytes or new_pos == end: 481 # Prediction failed. Return. 482 return new_pos 483 return DecodeRepeatedField 484 else: 485 def DecodeField(buffer, pos, end, message, field_dict): 486 value = field_dict.get(key) 487 if value is None: 488 value = field_dict.setdefault(key, new_default(message)) 489 # Read sub-message. 490 pos = value._InternalParse(buffer, pos, end) 491 # Read end tag. 492 new_pos = pos+end_tag_len 493 if buffer[pos:new_pos] != end_tag_bytes or new_pos > end: 494 raise _DecodeError('Missing group end tag.') 495 return new_pos 496 return DecodeField 497 498 499def MessageDecoder(field_number, is_repeated, is_packed, key, new_default): 500 """Returns a decoder for a message field.""" 501 502 local_DecodeVarint = _DecodeVarint 503 504 assert not is_packed 505 if is_repeated: 506 tag_bytes = encoder.TagBytes(field_number, 507 wire_format.WIRETYPE_LENGTH_DELIMITED) 508 tag_len = len(tag_bytes) 509 def DecodeRepeatedField(buffer, pos, end, message, field_dict): 510 value = field_dict.get(key) 511 if value is None: 512 value = field_dict.setdefault(key, new_default(message)) 513 while 1: 514 value = field_dict.get(key) 515 if value is None: 516 value = field_dict.setdefault(key, new_default(message)) 517 # Read length. 518 (size, pos) = local_DecodeVarint(buffer, pos) 519 new_pos = pos + size 520 if new_pos > end: 521 raise _DecodeError('Truncated message.') 522 # Read sub-message. 523 if value.add()._InternalParse(buffer, pos, new_pos) != new_pos: 524 # The only reason _InternalParse would return early is if it 525 # encountered an end-group tag. 526 raise _DecodeError('Unexpected end-group tag.') 527 # Predict that the next tag is another copy of the same repeated field. 528 pos = new_pos + tag_len 529 if buffer[new_pos:pos] != tag_bytes or new_pos == end: 530 # Prediction failed. Return. 531 return new_pos 532 return DecodeRepeatedField 533 else: 534 def DecodeField(buffer, pos, end, message, field_dict): 535 value = field_dict.get(key) 536 if value is None: 537 value = field_dict.setdefault(key, new_default(message)) 538 # Read length. 539 (size, pos) = local_DecodeVarint(buffer, pos) 540 new_pos = pos + size 541 if new_pos > end: 542 raise _DecodeError('Truncated message.') 543 # Read sub-message. 544 if value._InternalParse(buffer, pos, new_pos) != new_pos: 545 # The only reason _InternalParse would return early is if it encountered 546 # an end-group tag. 547 raise _DecodeError('Unexpected end-group tag.') 548 return new_pos 549 return DecodeField 550 551 552# -------------------------------------------------------------------- 553 554MESSAGE_SET_ITEM_TAG = encoder.TagBytes(1, wire_format.WIRETYPE_START_GROUP) 555 556def MessageSetItemDecoder(extensions_by_number): 557 """Returns a decoder for a MessageSet item. 558 559 The parameter is the _extensions_by_number map for the message class. 560 561 The message set message looks like this: 562 message MessageSet { 563 repeated group Item = 1 { 564 required int32 type_id = 2; 565 required string message = 3; 566 } 567 } 568 """ 569 570 type_id_tag_bytes = encoder.TagBytes(2, wire_format.WIRETYPE_VARINT) 571 message_tag_bytes = encoder.TagBytes(3, wire_format.WIRETYPE_LENGTH_DELIMITED) 572 item_end_tag_bytes = encoder.TagBytes(1, wire_format.WIRETYPE_END_GROUP) 573 574 local_ReadTag = ReadTag 575 local_DecodeVarint = _DecodeVarint 576 local_SkipField = SkipField 577 578 def DecodeItem(buffer, pos, end, message, field_dict): 579 message_set_item_start = pos 580 type_id = -1 581 message_start = -1 582 message_end = -1 583 584 # Technically, type_id and message can appear in any order, so we need 585 # a little loop here. 586 while 1: 587 (tag_bytes, pos) = local_ReadTag(buffer, pos) 588 if tag_bytes == type_id_tag_bytes: 589 (type_id, pos) = local_DecodeVarint(buffer, pos) 590 elif tag_bytes == message_tag_bytes: 591 (size, message_start) = local_DecodeVarint(buffer, pos) 592 pos = message_end = message_start + size 593 elif tag_bytes == item_end_tag_bytes: 594 break 595 else: 596 pos = SkipField(buffer, pos, end, tag_bytes) 597 if pos == -1: 598 raise _DecodeError('Missing group end tag.') 599 600 if pos > end: 601 raise _DecodeError('Truncated message.') 602 603 if type_id == -1: 604 raise _DecodeError('MessageSet item missing type_id.') 605 if message_start == -1: 606 raise _DecodeError('MessageSet item missing message.') 607 608 extension = extensions_by_number.get(type_id) 609 if extension is not None: 610 value = field_dict.get(extension) 611 if value is None: 612 value = field_dict.setdefault( 613 extension, extension.message_type._concrete_class()) 614 if value._InternalParse(buffer, message_start,message_end) != message_end: 615 # The only reason _InternalParse would return early is if it encountered 616 # an end-group tag. 617 raise _DecodeError('Unexpected end-group tag.') 618 else: 619 if not message._unknown_fields: 620 message._unknown_fields = [] 621 message._unknown_fields.append((MESSAGE_SET_ITEM_TAG, 622 buffer[message_set_item_start:pos])) 623 624 return pos 625 626 return DecodeItem 627 628# -------------------------------------------------------------------- 629# Optimization is not as heavy here because calls to SkipField() are rare, 630# except for handling end-group tags. 631 632def _SkipVarint(buffer, pos, end): 633 """Skip a varint value. Returns the new position.""" 634 635 while ord(buffer[pos]) & 0x80: 636 pos += 1 637 pos += 1 638 if pos > end: 639 raise _DecodeError('Truncated message.') 640 return pos 641 642def _SkipFixed64(buffer, pos, end): 643 """Skip a fixed64 value. Returns the new position.""" 644 645 pos += 8 646 if pos > end: 647 raise _DecodeError('Truncated message.') 648 return pos 649 650def _SkipLengthDelimited(buffer, pos, end): 651 """Skip a length-delimited value. Returns the new position.""" 652 653 (size, pos) = _DecodeVarint(buffer, pos) 654 pos += size 655 if pos > end: 656 raise _DecodeError('Truncated message.') 657 return pos 658 659def _SkipGroup(buffer, pos, end): 660 """Skip sub-group. Returns the new position.""" 661 662 while 1: 663 (tag_bytes, pos) = ReadTag(buffer, pos) 664 new_pos = SkipField(buffer, pos, end, tag_bytes) 665 if new_pos == -1: 666 return pos 667 pos = new_pos 668 669def _EndGroup(buffer, pos, end): 670 """Skipping an END_GROUP tag returns -1 to tell the parent loop to break.""" 671 672 return -1 673 674def _SkipFixed32(buffer, pos, end): 675 """Skip a fixed32 value. Returns the new position.""" 676 677 pos += 4 678 if pos > end: 679 raise _DecodeError('Truncated message.') 680 return pos 681 682def _RaiseInvalidWireType(buffer, pos, end): 683 """Skip function for unknown wire types. Raises an exception.""" 684 685 raise _DecodeError('Tag had invalid wire type.') 686 687def _FieldSkipper(): 688 """Constructs the SkipField function.""" 689 690 WIRETYPE_TO_SKIPPER = [ 691 _SkipVarint, 692 _SkipFixed64, 693 _SkipLengthDelimited, 694 _SkipGroup, 695 _EndGroup, 696 _SkipFixed32, 697 _RaiseInvalidWireType, 698 _RaiseInvalidWireType, 699 ] 700 701 wiretype_mask = wire_format.TAG_TYPE_MASK 702 local_ord = ord 703 704 def SkipField(buffer, pos, end, tag_bytes): 705 """Skips a field with the specified tag. 706 707 |pos| should point to the byte immediately after the tag. 708 709 Returns: 710 The new position (after the tag value), or -1 if the tag is an end-group 711 tag (in which case the calling loop should break). 712 """ 713 714 # The wire type is always in the first byte since varints are little-endian. 715 wire_type = local_ord(tag_bytes[0]) & wiretype_mask 716 return WIRETYPE_TO_SKIPPER[wire_type](buffer, pos, end) 717 718 return SkipField 719 720SkipField = _FieldSkipper() 721