1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 package com.google.protobuf; 32 33 import java.io.IOException; 34 import java.io.UnsupportedEncodingException; 35 import java.nio.ByteBuffer; 36 import java.util.Arrays; 37 import java.util.List; 38 39 /** 40 * The classes contained within are used internally by the Protocol Buffer 41 * library and generated message implementations. They are public only because 42 * those generated messages do not reside in the {@code protobuf} package. 43 * Others should not use this class directly. 44 * 45 * @author kenton@google.com (Kenton Varda) 46 */ 47 public class Internal { 48 /** 49 * Helper called by generated code to construct default values for string 50 * fields. 51 * <p> 52 * The protocol compiler does not actually contain a UTF-8 decoder -- it 53 * just pushes UTF-8-encoded text around without touching it. The one place 54 * where this presents a problem is when generating Java string literals. 55 * Unicode characters in the string literal would normally need to be encoded 56 * using a Unicode escape sequence, which would require decoding them. 57 * To get around this, protoc instead embeds the UTF-8 bytes into the 58 * generated code and leaves it to the runtime library to decode them. 59 * <p> 60 * It gets worse, though. If protoc just generated a byte array, like: 61 * new byte[] {0x12, 0x34, 0x56, 0x78} 62 * Java actually generates *code* which allocates an array and then fills 63 * in each value. This is much less efficient than just embedding the bytes 64 * directly into the bytecode. To get around this, we need another 65 * work-around. String literals are embedded directly, so protoc actually 66 * generates a string literal corresponding to the bytes. The easiest way 67 * to do this is to use the ISO-8859-1 character set, which corresponds to 68 * the first 256 characters of the Unicode range. Protoc can then use 69 * good old CEscape to generate the string. 70 * <p> 71 * So we have a string literal which represents a set of bytes which 72 * represents another string. This function -- stringDefaultValue -- 73 * converts from the generated string to the string we actually want. The 74 * generated code calls this automatically. 75 */ stringDefaultValue(String bytes)76 public static String stringDefaultValue(String bytes) { 77 try { 78 return new String(bytes.getBytes("ISO-8859-1"), "UTF-8"); 79 } catch (UnsupportedEncodingException e) { 80 // This should never happen since all JVMs are required to implement 81 // both of the above character sets. 82 throw new IllegalStateException( 83 "Java VM does not support a standard character set.", e); 84 } 85 } 86 87 /** 88 * Helper called by generated code to construct default values for bytes 89 * fields. 90 * <p> 91 * This is a lot like {@link #stringDefaultValue}, but for bytes fields. 92 * In this case we only need the second of the two hacks -- allowing us to 93 * embed raw bytes as a string literal with ISO-8859-1 encoding. 94 */ bytesDefaultValue(String bytes)95 public static ByteString bytesDefaultValue(String bytes) { 96 try { 97 return ByteString.copyFrom(bytes.getBytes("ISO-8859-1")); 98 } catch (UnsupportedEncodingException e) { 99 // This should never happen since all JVMs are required to implement 100 // ISO-8859-1. 101 throw new IllegalStateException( 102 "Java VM does not support a standard character set.", e); 103 } 104 } 105 /** 106 * Helper called by generated code to construct default values for bytes 107 * fields. 108 * <p> 109 * This is like {@link #bytesDefaultValue}, but returns a byte array. 110 */ byteArrayDefaultValue(String bytes)111 public static byte[] byteArrayDefaultValue(String bytes) { 112 try { 113 return bytes.getBytes("ISO-8859-1"); 114 } catch (UnsupportedEncodingException e) { 115 // This should never happen since all JVMs are required to implement 116 // ISO-8859-1. 117 throw new IllegalStateException( 118 "Java VM does not support a standard character set.", e); 119 } 120 } 121 122 /** 123 * Helper called by generated code to construct default values for bytes 124 * fields. 125 * <p> 126 * This is like {@link #bytesDefaultValue}, but returns a ByteBuffer. 127 */ byteBufferDefaultValue(String bytes)128 public static ByteBuffer byteBufferDefaultValue(String bytes) { 129 return ByteBuffer.wrap(byteArrayDefaultValue(bytes)); 130 } 131 132 /** 133 * Create a new ByteBuffer and copy all the content of {@code source} 134 * ByteBuffer to the new ByteBuffer. The new ByteBuffer's limit and 135 * capacity will be source.capacity(), and its position will be 0. 136 * Note that the state of {@code source} ByteBuffer won't be changed. 137 */ copyByteBuffer(ByteBuffer source)138 public static ByteBuffer copyByteBuffer(ByteBuffer source) { 139 // Make a duplicate of the source ByteBuffer and read data from the 140 // duplicate. This is to avoid affecting the source ByteBuffer's state. 141 ByteBuffer temp = source.duplicate(); 142 // We want to copy all the data in the source ByteBuffer, not just the 143 // remaining bytes. 144 temp.clear(); 145 ByteBuffer result = ByteBuffer.allocate(temp.capacity()); 146 result.put(temp); 147 result.clear(); 148 return result; 149 } 150 151 /** 152 * Helper called by generated code to determine if a byte array is a valid 153 * UTF-8 encoded string such that the original bytes can be converted to 154 * a String object and then back to a byte array round tripping the bytes 155 * without loss. More precisely, returns {@code true} whenever: 156 * <pre> {@code 157 * Arrays.equals(byteString.toByteArray(), 158 * new String(byteString.toByteArray(), "UTF-8").getBytes("UTF-8")) 159 * }</pre> 160 * 161 * <p>This method rejects "overlong" byte sequences, as well as 162 * 3-byte sequences that would map to a surrogate character, in 163 * accordance with the restricted definition of UTF-8 introduced in 164 * Unicode 3.1. Note that the UTF-8 decoder included in Oracle's 165 * JDK has been modified to also reject "overlong" byte sequences, 166 * but currently (2011) still accepts 3-byte surrogate character 167 * byte sequences. 168 * 169 * <p>See the Unicode Standard,</br> 170 * Table 3-6. <em>UTF-8 Bit Distribution</em>,</br> 171 * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>. 172 * 173 * <p>As of 2011-02, this method simply returns the result of {@link 174 * ByteString#isValidUtf8()}. Calling that method directly is preferred. 175 * 176 * @param byteString the string to check 177 * @return whether the byte array is round trippable 178 */ isValidUtf8(ByteString byteString)179 public static boolean isValidUtf8(ByteString byteString) { 180 return byteString.isValidUtf8(); 181 } 182 183 /** 184 * Like {@link #isValidUtf8(ByteString)} but for byte arrays. 185 */ isValidUtf8(byte[] byteArray)186 public static boolean isValidUtf8(byte[] byteArray) { 187 return Utf8.isValidUtf8(byteArray); 188 } 189 190 /** 191 * Helper method to get the UTF-8 bytes of a string. 192 */ toByteArray(String value)193 public static byte[] toByteArray(String value) { 194 try { 195 return value.getBytes("UTF-8"); 196 } catch (UnsupportedEncodingException e) { 197 throw new RuntimeException("UTF-8 not supported?", e); 198 } 199 } 200 201 /** 202 * Helper method to convert a byte array to a string using UTF-8 encoding. 203 */ toStringUtf8(byte[] bytes)204 public static String toStringUtf8(byte[] bytes) { 205 try { 206 return new String(bytes, "UTF-8"); 207 } catch (UnsupportedEncodingException e) { 208 throw new RuntimeException("UTF-8 not supported?", e); 209 } 210 } 211 212 /** 213 * Interface for an enum value or value descriptor, to be used in FieldSet. 214 * The lite library stores enum values directly in FieldSets but the full 215 * library stores EnumValueDescriptors in order to better support reflection. 216 */ 217 public interface EnumLite { getNumber()218 int getNumber(); 219 } 220 221 /** 222 * Interface for an object which maps integers to {@link EnumLite}s. 223 * {@link Descriptors.EnumDescriptor} implements this interface by mapping 224 * numbers to {@link Descriptors.EnumValueDescriptor}s. Additionally, 225 * every generated enum type has a static method internalGetValueMap() which 226 * returns an implementation of this type that maps numbers to enum values. 227 */ 228 public interface EnumLiteMap<T extends EnumLite> { findValueByNumber(int number)229 T findValueByNumber(int number); 230 } 231 232 /** 233 * Helper method for implementing {@link MessageLite#hashCode()} for longs. 234 * @see Long#hashCode() 235 */ hashLong(long n)236 public static int hashLong(long n) { 237 return (int) (n ^ (n >>> 32)); 238 } 239 240 /** 241 * Helper method for implementing {@link MessageLite#hashCode()} for 242 * booleans. 243 * @see Boolean#hashCode() 244 */ hashBoolean(boolean b)245 public static int hashBoolean(boolean b) { 246 return b ? 1231 : 1237; 247 } 248 249 /** 250 * Helper method for implementing {@link MessageLite#hashCode()} for enums. 251 * <p> 252 * This is needed because {@link java.lang.Enum#hashCode()} is final, but we 253 * need to use the field number as the hash code to ensure compatibility 254 * between statically and dynamically generated enum objects. 255 */ hashEnum(EnumLite e)256 public static int hashEnum(EnumLite e) { 257 return e.getNumber(); 258 } 259 260 /** 261 * Helper method for implementing {@link MessageLite#hashCode()} for 262 * enum lists. 263 */ hashEnumList(List<? extends EnumLite> list)264 public static int hashEnumList(List<? extends EnumLite> list) { 265 int hash = 1; 266 for (EnumLite e : list) { 267 hash = 31 * hash + hashEnum(e); 268 } 269 return hash; 270 } 271 272 /** 273 * Helper method for implementing {@link MessageLite#equals()} for bytes field. 274 */ equals(List<byte[]> a, List<byte[]> b)275 public static boolean equals(List<byte[]> a, List<byte[]> b) { 276 if (a.size() != b.size()) return false; 277 for (int i = 0; i < a.size(); ++i) { 278 if (!Arrays.equals(a.get(i), b.get(i))) { 279 return false; 280 } 281 } 282 return true; 283 } 284 285 /** 286 * Helper method for implementing {@link MessageLite#hashCode()} for bytes field. 287 */ hashCode(List<byte[]> list)288 public static int hashCode(List<byte[]> list) { 289 int hash = 1; 290 for (byte[] bytes : list) { 291 hash = 31 * hash + hashCode(bytes); 292 } 293 return hash; 294 } 295 296 /** 297 * Helper method for implementing {@link MessageLite#hashCode()} for bytes field. 298 */ hashCode(byte[] bytes)299 public static int hashCode(byte[] bytes) { 300 // The hash code for a byte array should be the same as the hash code for a 301 // ByteString with the same content. This is to ensure that the generated 302 // hashCode() method will return the same value as the pure reflection 303 // based hashCode() method. 304 return LiteralByteString.hashCode(bytes); 305 } 306 307 /** 308 * Helper method for implementing {@link MessageLite#equals()} for bytes 309 * field. 310 */ equalsByteBuffer(ByteBuffer a, ByteBuffer b)311 public static boolean equalsByteBuffer(ByteBuffer a, ByteBuffer b) { 312 if (a.capacity() != b.capacity()) { 313 return false; 314 } 315 // ByteBuffer.equals() will only compare the remaining bytes, but we want to 316 // compare all the content. 317 return a.duplicate().clear().equals(b.duplicate().clear()); 318 } 319 320 /** 321 * Helper method for implementing {@link MessageLite#equals()} for bytes 322 * field. 323 */ equalsByteBuffer( List<ByteBuffer> a, List<ByteBuffer> b)324 public static boolean equalsByteBuffer( 325 List<ByteBuffer> a, List<ByteBuffer> b) { 326 if (a.size() != b.size()) { 327 return false; 328 } 329 for (int i = 0; i < a.size(); ++i) { 330 if (!equalsByteBuffer(a.get(i), b.get(i))) { 331 return false; 332 } 333 } 334 return true; 335 } 336 337 /** 338 * Helper method for implementing {@link MessageLite#hashCode()} for bytes 339 * field. 340 */ hashCodeByteBuffer(List<ByteBuffer> list)341 public static int hashCodeByteBuffer(List<ByteBuffer> list) { 342 int hash = 1; 343 for (ByteBuffer bytes : list) { 344 hash = 31 * hash + hashCodeByteBuffer(bytes); 345 } 346 return hash; 347 } 348 349 private static final int DEFAULT_BUFFER_SIZE = 4096; 350 351 /** 352 * Helper method for implementing {@link MessageLite#hashCode()} for bytes 353 * field. 354 */ hashCodeByteBuffer(ByteBuffer bytes)355 public static int hashCodeByteBuffer(ByteBuffer bytes) { 356 if (bytes.hasArray()) { 357 // Fast path. 358 int h = LiteralByteString.hashCode(bytes.capacity(), bytes.array(), 359 bytes.arrayOffset(), bytes.capacity()); 360 return h == 0 ? 1 : h; 361 } else { 362 // Read the data into a temporary byte array before calculating the 363 // hash value. 364 final int bufferSize = bytes.capacity() > DEFAULT_BUFFER_SIZE 365 ? DEFAULT_BUFFER_SIZE : bytes.capacity(); 366 final byte[] buffer = new byte[bufferSize]; 367 final ByteBuffer duplicated = bytes.duplicate(); 368 duplicated.clear(); 369 int h = bytes.capacity(); 370 while (duplicated.remaining() > 0) { 371 final int length = duplicated.remaining() <= bufferSize ? 372 duplicated.remaining() : bufferSize; 373 duplicated.get(buffer, 0, length); 374 h = LiteralByteString.hashCode(h, buffer, 0, length); 375 } 376 return h == 0 ? 1 : h; 377 } 378 } 379 380 /** 381 * An empty byte array constant used in generated code. 382 */ 383 public static final byte[] EMPTY_BYTE_ARRAY = new byte[0]; 384 385 /** 386 * An empty byte array constant used in generated code. 387 */ 388 public static final ByteBuffer EMPTY_BYTE_BUFFER = 389 ByteBuffer.wrap(EMPTY_BYTE_ARRAY); 390 391 } 392