1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 package com.google.protobuf;
32 
33 import java.io.ByteArrayInputStream;
34 import java.io.ByteArrayOutputStream;
35 import java.io.IOException;
36 import java.io.InputStream;
37 import java.io.InvalidObjectException;
38 import java.io.ObjectInputStream;
39 import java.io.OutputStream;
40 import java.io.Serializable;
41 import java.io.UnsupportedEncodingException;
42 import java.nio.ByteBuffer;
43 import java.nio.charset.Charset;
44 import java.nio.charset.UnsupportedCharsetException;
45 import java.util.ArrayList;
46 import java.util.Arrays;
47 import java.util.Collection;
48 import java.util.Collections;
49 import java.util.Comparator;
50 import java.util.Iterator;
51 import java.util.List;
52 import java.util.NoSuchElementException;
53 
54 /**
55  * Immutable sequence of bytes. Substring is supported by sharing the reference to the immutable
56  * underlying bytes. Concatenation is likewise supported without copying (long strings) by building
57  * a tree of pieces in {@link RopeByteString}.
58  *
59  * <p>Like {@link String}, the contents of a {@link ByteString} can never be observed to change, not
60  * even in the presence of a data race or incorrect API usage in the client code.
61  *
62  * @author crazybob@google.com Bob Lee
63  * @author kenton@google.com Kenton Varda
64  * @author carlanton@google.com Carl Haverl
65  * @author martinrb@google.com Martin Buchholz
66  */
67 public abstract class ByteString implements Iterable<Byte>, Serializable {
68 
69   /**
70    * When two strings to be concatenated have a combined length shorter than this, we just copy
71    * their bytes on {@link #concat(ByteString)}. The trade-off is copy size versus the overhead of
72    * creating tree nodes in {@link RopeByteString}.
73    */
74   static final int CONCATENATE_BY_COPY_SIZE = 128;
75 
76   /**
77    * When copying an InputStream into a ByteString with .readFrom(), the chunks in the underlying
78    * rope start at 256 bytes, but double each iteration up to 8192 bytes.
79    */
80   static final int MIN_READ_FROM_CHUNK_SIZE = 0x100; // 256b
81 
82   static final int MAX_READ_FROM_CHUNK_SIZE = 0x2000; // 8k
83 
84   /** Empty {@code ByteString}. */
85   public static final ByteString EMPTY = new LiteralByteString(Internal.EMPTY_BYTE_ARRAY);
86 
87   /**
88    * An interface to efficiently copy {@code byte[]}.
89    *
90    * <p>One of the noticeable costs of copying a byte[] into a new array using {@code
91    * System.arraycopy} is nullification of a new buffer before the copy. It has been shown the
92    * Hotspot VM is capable to intrisicfy {@code Arrays.copyOfRange} operation to avoid this
93    * expensive nullification and provide substantial performance gain. Unfortunately this does not
94    * hold on Android runtimes and could make the copy slightly slower due to additional code in the
95    * {@code Arrays.copyOfRange}. Thus we provide two different implementation for array copier for
96    * Hotspot and Android runtimes.
97    */
98   private interface ByteArrayCopier {
99     /** Copies the specified range of the specified array into a new array */
copyFrom(byte[] bytes, int offset, int size)100     byte[] copyFrom(byte[] bytes, int offset, int size);
101   }
102 
103   /** Implementation of {@code ByteArrayCopier} which uses {@link System#arraycopy}. */
104   private static final class SystemByteArrayCopier implements ByteArrayCopier {
105     @Override
copyFrom(byte[] bytes, int offset, int size)106     public byte[] copyFrom(byte[] bytes, int offset, int size) {
107       byte[] copy = new byte[size];
108       System.arraycopy(bytes, offset, copy, 0, size);
109       return copy;
110     }
111   }
112 
113   /** Implementation of {@code ByteArrayCopier} which uses {@link Arrays#copyOfRange}. */
114   private static final class ArraysByteArrayCopier implements ByteArrayCopier {
115     @Override
copyFrom(byte[] bytes, int offset, int size)116     public byte[] copyFrom(byte[] bytes, int offset, int size) {
117       return Arrays.copyOfRange(bytes, offset, offset + size);
118     }
119   }
120 
121   private static final ByteArrayCopier byteArrayCopier;
122 
123   static {
124     byteArrayCopier =
125         Android.isOnAndroidDevice() ? new SystemByteArrayCopier() : new ArraysByteArrayCopier();
126   }
127 
128   /**
129    * Cached hash value. Intentionally accessed via a data race, which is safe because of the Java
130    * Memory Model's "no out-of-thin-air values" guarantees for ints. A value of 0 implies that the
131    * hash has not been set.
132    */
133   private int hash = 0;
134 
135   // This constructor is here to prevent subclassing outside of this package,
ByteString()136   ByteString() {}
137 
138   /**
139    * Gets the byte at the given index. This method should be used only for random access to
140    * individual bytes. To access bytes sequentially, use the {@link ByteIterator} returned by {@link
141    * #iterator()}, and call {@link #substring(int, int)} first if necessary.
142    *
143    * @param index index of byte
144    * @return the value
145    * @throws IndexOutOfBoundsException {@code index < 0 or index >= size}
146    */
byteAt(int index)147   public abstract byte byteAt(int index);
148 
149   /**
150    * Gets the byte at the given index, assumes bounds checking has already been performed.
151    *
152    * @param index index of byte
153    * @return the value
154    * @throws IndexOutOfBoundsException {@code index < 0 or index >= size}
155    */
internalByteAt(int index)156   abstract byte internalByteAt(int index);
157 
158   /**
159    * Return a {@link ByteString.ByteIterator} over the bytes in the ByteString. To avoid
160    * auto-boxing, you may get the iterator manually and call {@link ByteIterator#nextByte()}.
161    *
162    * @return the iterator
163    */
164   @Override
iterator()165   public ByteIterator iterator() {
166     return new AbstractByteIterator() {
167       private int position = 0;
168       private final int limit = size();
169 
170       @Override
171       public boolean hasNext() {
172         return position < limit;
173       }
174 
175       @Override
176       public byte nextByte() {
177         int currentPos = position;
178         if (currentPos >= limit) {
179           throw new NoSuchElementException();
180         }
181         position = currentPos + 1;
182         return internalByteAt(currentPos);
183       }
184     };
185   }
186 
187   /**
188    * This interface extends {@code Iterator<Byte>}, so that we can return an unboxed {@code byte}.
189    */
190   public interface ByteIterator extends Iterator<Byte> {
191     /**
192      * An alternative to {@link Iterator#next()} that returns an unboxed primitive {@code byte}.
193      *
194      * @return the next {@code byte} in the iteration
195      * @throws NoSuchElementException if the iteration has no more elements
196      */
nextByte()197     byte nextByte();
198   }
199 
200   abstract static class AbstractByteIterator implements ByteIterator {
201     @Override
next()202     public final Byte next() {
203       // Boxing calls Byte.valueOf(byte), which does not instantiate.
204       return nextByte();
205     }
206 
207     @Override
remove()208     public final void remove() {
209       throw new UnsupportedOperationException();
210     }
211   }
212 
213   /**
214    * Gets the number of bytes.
215    *
216    * @return size in bytes
217    */
size()218   public abstract int size();
219 
220   /**
221    * Returns {@code true} if the size is {@code 0}, {@code false} otherwise.
222    *
223    * @return true if this is zero bytes long
224    */
isEmpty()225   public final boolean isEmpty() {
226     return size() == 0;
227   }
228 
229   // =================================================================
230   // Comparison
231 
232   private static final int UNSIGNED_BYTE_MASK = 0xFF;
233 
234   /**
235    * Returns the value of the given byte as an integer, interpreting the byte as an unsigned value.
236    * That is, returns {@code value + 256} if {@code value} is negative; {@code value} itself
237    * otherwise.
238    *
239    * <p>Note: This code was copied from {@link com.google.common.primitives.UnsignedBytes#toInt}, as
240    * Guava libraries cannot be used in the {@code com.google.protobuf} package.
241    */
toInt(byte value)242   private static int toInt(byte value) {
243     return value & UNSIGNED_BYTE_MASK;
244   }
245 
246   /**
247    * Compares two {@link ByteString}s lexicographically, treating their contents as unsigned byte
248    * values between 0 and 255 (inclusive).
249    *
250    * <p>For example, {@code (byte) -1} is considered to be greater than {@code (byte) 1} because it
251    * is interpreted as an unsigned value, {@code 255}.
252    */
253   private static final Comparator<ByteString> UNSIGNED_LEXICOGRAPHICAL_COMPARATOR =
254       new Comparator<ByteString>() {
255         @Override
256         public int compare(ByteString former, ByteString latter) {
257           ByteIterator formerBytes = former.iterator();
258           ByteIterator latterBytes = latter.iterator();
259 
260           while (formerBytes.hasNext() && latterBytes.hasNext()) {
261             // Note: This code was copied from com.google.common.primitives.UnsignedBytes#compare,
262             // as Guava libraries cannot be used in the {@code com.google.protobuf} package.
263             int result =
264                 Integer.compare(toInt(formerBytes.nextByte()), toInt(latterBytes.nextByte()));
265             if (result != 0) {
266               return result;
267             }
268           }
269 
270           return Integer.compare(former.size(), latter.size());
271         }
272       };
273 
274   /**
275    * Returns a {@link Comparator} which compares {@link ByteString}-s lexicographically
276    * as sequences of unsigned bytes (i.e. values between 0 and 255, inclusive).
277    *
278    * <p>For example, {@code (byte) -1} is considered to be greater than {@code (byte) 1} because it
279    * is interpreted as an unsigned value, {@code 255}:
280    *
281    * <ul>
282    *   <li>{@code `-1` -> 0b11111111 (two's complement) -> 255}
283    *   <li>{@code `1` -> 0b00000001 -> 1}
284    * </ul>
285    */
unsignedLexicographicalComparator()286   public static Comparator<ByteString> unsignedLexicographicalComparator() {
287     return UNSIGNED_LEXICOGRAPHICAL_COMPARATOR;
288   }
289 
290   // =================================================================
291   // ByteString -> substring
292 
293   /**
294    * Return the substring from {@code beginIndex}, inclusive, to the end of the string.
295    *
296    * @param beginIndex start at this index
297    * @return substring sharing underlying data
298    * @throws IndexOutOfBoundsException if {@code beginIndex < 0} or {@code beginIndex > size()}.
299    */
substring(int beginIndex)300   public final ByteString substring(int beginIndex) {
301     return substring(beginIndex, size());
302   }
303 
304   /**
305    * Return the substring from {@code beginIndex}, inclusive, to {@code endIndex}, exclusive.
306    *
307    * @param beginIndex start at this index
308    * @param endIndex the last character is the one before this index
309    * @return substring sharing underlying data
310    * @throws IndexOutOfBoundsException if {@code beginIndex < 0}, {@code endIndex > size()}, or
311    *     {@code beginIndex > endIndex}.
312    */
substring(int beginIndex, int endIndex)313   public abstract ByteString substring(int beginIndex, int endIndex);
314 
315   /**
316    * Tests if this bytestring starts with the specified prefix. Similar to {@link
317    * String#startsWith(String)}
318    *
319    * @param prefix the prefix.
320    * @return <code>true</code> if the byte sequence represented by the argument is a prefix of the
321    *     byte sequence represented by this string; <code>false</code> otherwise.
322    */
startsWith(ByteString prefix)323   public final boolean startsWith(ByteString prefix) {
324     return size() >= prefix.size() && substring(0, prefix.size()).equals(prefix);
325   }
326 
327   /**
328    * Tests if this bytestring ends with the specified suffix. Similar to {@link
329    * String#endsWith(String)}
330    *
331    * @param suffix the suffix.
332    * @return <code>true</code> if the byte sequence represented by the argument is a suffix of the
333    *     byte sequence represented by this string; <code>false</code> otherwise.
334    */
endsWith(ByteString suffix)335   public final boolean endsWith(ByteString suffix) {
336     return size() >= suffix.size() && substring(size() - suffix.size()).equals(suffix);
337   }
338 
339   // =================================================================
340   // byte[] -> ByteString
341 
342   /**
343    * Copies the given bytes into a {@code ByteString}.
344    *
345    * @param bytes source array
346    * @param offset offset in source array
347    * @param size number of bytes to copy
348    * @return new {@code ByteString}
349    * @throws IndexOutOfBoundsException if {@code offset} or {@code size} are out of bounds
350    */
copyFrom(byte[] bytes, int offset, int size)351   public static ByteString copyFrom(byte[] bytes, int offset, int size) {
352     checkRange(offset, offset + size, bytes.length);
353     return new LiteralByteString(byteArrayCopier.copyFrom(bytes, offset, size));
354   }
355 
356   /**
357    * Copies the given bytes into a {@code ByteString}.
358    *
359    * @param bytes to copy
360    * @return new {@code ByteString}
361    */
copyFrom(byte[] bytes)362   public static ByteString copyFrom(byte[] bytes) {
363     return copyFrom(bytes, 0, bytes.length);
364   }
365 
366   /** Wraps the given bytes into a {@code ByteString}. Intended for internal only usage. */
wrap(ByteBuffer buffer)367   static ByteString wrap(ByteBuffer buffer) {
368     if (buffer.hasArray()) {
369       final int offset = buffer.arrayOffset();
370       return ByteString.wrap(buffer.array(), offset + buffer.position(), buffer.remaining());
371     } else {
372       return new NioByteString(buffer);
373     }
374   }
375 
376   /**
377    * Wraps the given bytes into a {@code ByteString}. Intended for internal only usage to force a
378    * classload of ByteString before LiteralByteString.
379    */
wrap(byte[] bytes)380   static ByteString wrap(byte[] bytes) {
381     // TODO(dweis): Return EMPTY when bytes are empty to reduce allocations?
382     return new LiteralByteString(bytes);
383   }
384 
385   /**
386    * Wraps the given bytes into a {@code ByteString}. Intended for internal only usage to force a
387    * classload of ByteString before BoundedByteString and LiteralByteString.
388    */
wrap(byte[] bytes, int offset, int length)389   static ByteString wrap(byte[] bytes, int offset, int length) {
390     return new BoundedByteString(bytes, offset, length);
391   }
392 
393   /**
394    * Copies the next {@code size} bytes from a {@code java.nio.ByteBuffer} into a {@code
395    * ByteString}.
396    *
397    * @param bytes source buffer
398    * @param size number of bytes to copy
399    * @return new {@code ByteString}
400    * @throws IndexOutOfBoundsException if {@code size > bytes.remaining()}
401    */
copyFrom(ByteBuffer bytes, int size)402   public static ByteString copyFrom(ByteBuffer bytes, int size) {
403     checkRange(0, size, bytes.remaining());
404     byte[] copy = new byte[size];
405     bytes.get(copy);
406     return new LiteralByteString(copy);
407   }
408 
409   /**
410    * Copies the remaining bytes from a {@code java.nio.ByteBuffer} into a {@code ByteString}.
411    *
412    * @param bytes sourceBuffer
413    * @return new {@code ByteString}
414    */
copyFrom(ByteBuffer bytes)415   public static ByteString copyFrom(ByteBuffer bytes) {
416     return copyFrom(bytes, bytes.remaining());
417   }
418 
419   /**
420    * Encodes {@code text} into a sequence of bytes using the named charset and returns the result as
421    * a {@code ByteString}.
422    *
423    * @param text source string
424    * @param charsetName encoding to use
425    * @return new {@code ByteString}
426    * @throws UnsupportedEncodingException if the encoding isn't found
427    */
copyFrom(String text, String charsetName)428   public static ByteString copyFrom(String text, String charsetName)
429       throws UnsupportedEncodingException {
430     return new LiteralByteString(text.getBytes(charsetName));
431   }
432 
433   /**
434    * Encodes {@code text} into a sequence of bytes using the named charset and returns the result as
435    * a {@code ByteString}.
436    *
437    * @param text source string
438    * @param charset encode using this charset
439    * @return new {@code ByteString}
440    */
copyFrom(String text, Charset charset)441   public static ByteString copyFrom(String text, Charset charset) {
442     return new LiteralByteString(text.getBytes(charset));
443   }
444 
445   /**
446    * Encodes {@code text} into a sequence of UTF-8 bytes and returns the result as a {@code
447    * ByteString}.
448    *
449    * @param text source string
450    * @return new {@code ByteString}
451    */
copyFromUtf8(String text)452   public static ByteString copyFromUtf8(String text) {
453     return new LiteralByteString(text.getBytes(Internal.UTF_8));
454   }
455 
456   // =================================================================
457   // InputStream -> ByteString
458 
459   /**
460    * Completely reads the given stream's bytes into a {@code ByteString}, blocking if necessary
461    * until all bytes are read through to the end of the stream.
462    *
463    * <p><b>Performance notes:</b> The returned {@code ByteString} is an immutable tree of byte
464    * arrays ("chunks") of the stream data. The first chunk is small, with subsequent chunks each
465    * being double the size, up to 8K.
466    *
467    * <p>Each byte read from the input stream will be copied twice to ensure that the resulting
468    * ByteString is truly immutable.
469    *
470    * @param streamToDrain The source stream, which is read completely but not closed.
471    * @return A new {@code ByteString} which is made up of chunks of various sizes, depending on the
472    *     behavior of the underlying stream.
473    * @throws IOException IOException is thrown if there is a problem reading the underlying stream.
474    */
readFrom(InputStream streamToDrain)475   public static ByteString readFrom(InputStream streamToDrain) throws IOException {
476     return readFrom(streamToDrain, MIN_READ_FROM_CHUNK_SIZE, MAX_READ_FROM_CHUNK_SIZE);
477   }
478 
479   /**
480    * Completely reads the given stream's bytes into a {@code ByteString}, blocking if necessary
481    * until all bytes are read through to the end of the stream.
482    *
483    * <p><b>Performance notes:</b> The returned {@code ByteString} is an immutable tree of byte
484    * arrays ("chunks") of the stream data. The chunkSize parameter sets the size of these byte
485    * arrays.
486    *
487    * <p>Each byte read from the input stream will be copied twice to ensure that the resulting
488    * ByteString is truly immutable.
489    *
490    * @param streamToDrain The source stream, which is read completely but not closed.
491    * @param chunkSize The size of the chunks in which to read the stream.
492    * @return A new {@code ByteString} which is made up of chunks of the given size.
493    * @throws IOException IOException is thrown if there is a problem reading the underlying stream.
494    */
readFrom(InputStream streamToDrain, int chunkSize)495   public static ByteString readFrom(InputStream streamToDrain, int chunkSize) throws IOException {
496     return readFrom(streamToDrain, chunkSize, chunkSize);
497   }
498 
499   // Helper method that takes the chunk size range as a parameter.
readFrom(InputStream streamToDrain, int minChunkSize, int maxChunkSize)500   public static ByteString readFrom(InputStream streamToDrain, int minChunkSize, int maxChunkSize)
501       throws IOException {
502     Collection<ByteString> results = new ArrayList<ByteString>();
503 
504     // copy the inbound bytes into a list of chunks; the chunk size
505     // grows exponentially to support both short and long streams.
506     int chunkSize = minChunkSize;
507     while (true) {
508       ByteString chunk = readChunk(streamToDrain, chunkSize);
509       if (chunk == null) {
510         break;
511       }
512       results.add(chunk);
513       chunkSize = Math.min(chunkSize * 2, maxChunkSize);
514     }
515 
516     return ByteString.copyFrom(results);
517   }
518 
519   /**
520    * Blocks until a chunk of the given size can be made from the stream, or EOF is reached. Calls
521    * read() repeatedly in case the given stream implementation doesn't completely fill the given
522    * buffer in one read() call.
523    *
524    * @return A chunk of the desired size, or else a chunk as large as was available when end of
525    *     stream was reached. Returns null if the given stream had no more data in it.
526    */
readChunk(InputStream in, final int chunkSize)527   private static ByteString readChunk(InputStream in, final int chunkSize) throws IOException {
528     final byte[] buf = new byte[chunkSize];
529     int bytesRead = 0;
530     while (bytesRead < chunkSize) {
531       final int count = in.read(buf, bytesRead, chunkSize - bytesRead);
532       if (count == -1) {
533         break;
534       }
535       bytesRead += count;
536     }
537 
538     if (bytesRead == 0) {
539       return null;
540     }
541 
542     // Always make a copy since InputStream could steal a reference to buf.
543     return ByteString.copyFrom(buf, 0, bytesRead);
544   }
545 
546   // =================================================================
547   // Multiple ByteStrings -> One ByteString
548 
549   /**
550    * Concatenate the given {@code ByteString} to this one. Short concatenations, of total size
551    * smaller than {@link ByteString#CONCATENATE_BY_COPY_SIZE}, are produced by copying the
552    * underlying bytes (as per Rope.java, <a
553    * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf">
554    * BAP95 </a>. In general, the concatenate involves no copying.
555    *
556    * @param other string to concatenate
557    * @return a new {@code ByteString} instance
558    */
concat(ByteString other)559   public final ByteString concat(ByteString other) {
560     if (Integer.MAX_VALUE - size() < other.size()) {
561       throw new IllegalArgumentException(
562           "ByteString would be too long: " + size() + "+" + other.size());
563     }
564 
565     return RopeByteString.concatenate(this, other);
566   }
567 
568   /**
569    * Concatenates all byte strings in the iterable and returns the result. This is designed to run
570    * in O(list size), not O(total bytes).
571    *
572    * <p>The returned {@code ByteString} is not necessarily a unique object. If the list is empty,
573    * the returned object is the singleton empty {@code ByteString}. If the list has only one
574    * element, that {@code ByteString} will be returned without copying.
575    *
576    * @param byteStrings strings to be concatenated
577    * @return new {@code ByteString}
578    */
copyFrom(Iterable<ByteString> byteStrings)579   public static ByteString copyFrom(Iterable<ByteString> byteStrings) {
580     // Determine the size;
581     final int size;
582     if (!(byteStrings instanceof Collection)) {
583       int tempSize = 0;
584       for (Iterator<ByteString> iter = byteStrings.iterator();
585           iter.hasNext();
586           iter.next(), ++tempSize) {}
587       size = tempSize;
588     } else {
589       size = ((Collection<ByteString>) byteStrings).size();
590     }
591 
592     if (size == 0) {
593       return EMPTY;
594     }
595 
596     return balancedConcat(byteStrings.iterator(), size);
597   }
598 
599   // Internal function used by copyFrom(Iterable<ByteString>).
600   // Create a balanced concatenation of the next "length" elements from the
601   // iterable.
balancedConcat(Iterator<ByteString> iterator, int length)602   private static ByteString balancedConcat(Iterator<ByteString> iterator, int length) {
603     if (length < 1) {
604       throw new IllegalArgumentException(String.format("length (%s) must be >= 1", length));
605     }
606     ByteString result;
607     if (length == 1) {
608       result = iterator.next();
609     } else {
610       int halfLength = length >>> 1;
611       ByteString left = balancedConcat(iterator, halfLength);
612       ByteString right = balancedConcat(iterator, length - halfLength);
613       result = left.concat(right);
614     }
615     return result;
616   }
617 
618   // =================================================================
619   // ByteString -> byte[]
620 
621   /**
622    * Copies bytes into a buffer at the given offset.
623    *
624    * <p>To copy a subset of bytes, you call this method on the return value of {@link
625    * #substring(int, int)}. Example: {@code byteString.substring(start, end).copyTo(target, offset)}
626    *
627    * @param target buffer to copy into
628    * @param offset in the target buffer
629    * @throws IndexOutOfBoundsException if the offset is negative or too large
630    */
copyTo(byte[] target, int offset)631   public void copyTo(byte[] target, int offset) {
632     copyTo(target, 0, offset, size());
633   }
634 
635   /**
636    * Copies bytes into a buffer.
637    *
638    * @param target buffer to copy into
639    * @param sourceOffset offset within these bytes
640    * @param targetOffset offset within the target buffer
641    * @param numberToCopy number of bytes to copy
642    * @throws IndexOutOfBoundsException if an offset or size is negative or too large
643    * @deprecated Instead, call {@code byteString.substring(sourceOffset, sourceOffset +
644    *     numberToCopy).copyTo(target, targetOffset)}
645    */
646   @Deprecated
copyTo(byte[] target, int sourceOffset, int targetOffset, int numberToCopy)647   public final void copyTo(byte[] target, int sourceOffset, int targetOffset, int numberToCopy) {
648     checkRange(sourceOffset, sourceOffset + numberToCopy, size());
649     checkRange(targetOffset, targetOffset + numberToCopy, target.length);
650     if (numberToCopy > 0) {
651       copyToInternal(target, sourceOffset, targetOffset, numberToCopy);
652     }
653   }
654 
655   /**
656    * Internal (package private) implementation of {@link #copyTo(byte[],int,int,int)}. It assumes
657    * that all error checking has already been performed and that {@code numberToCopy > 0}.
658    */
copyToInternal( byte[] target, int sourceOffset, int targetOffset, int numberToCopy)659   protected abstract void copyToInternal(
660       byte[] target, int sourceOffset, int targetOffset, int numberToCopy);
661 
662   /**
663    * Copies bytes into a ByteBuffer.
664    *
665    * <p>To copy a subset of bytes, you call this method on the return value of {@link
666    * #substring(int, int)}. Example: {@code byteString.substring(start, end).copyTo(target)}
667    *
668    * @param target ByteBuffer to copy into.
669    * @throws java.nio.ReadOnlyBufferException if the {@code target} is read-only
670    * @throws java.nio.BufferOverflowException if the {@code target}'s remaining() space is not large
671    *     enough to hold the data.
672    */
copyTo(ByteBuffer target)673   public abstract void copyTo(ByteBuffer target);
674 
675   /**
676    * Copies bytes to a {@code byte[]}.
677    *
678    * @return copied bytes
679    */
toByteArray()680   public final byte[] toByteArray() {
681     final int size = size();
682     if (size == 0) {
683       return Internal.EMPTY_BYTE_ARRAY;
684     }
685     byte[] result = new byte[size];
686     copyToInternal(result, 0, 0, size);
687     return result;
688   }
689 
690   /**
691    * Writes a copy of the contents of this byte string to the specified output stream argument.
692    *
693    * @param out the output stream to which to write the data.
694    * @throws IOException if an I/O error occurs.
695    */
writeTo(OutputStream out)696   public abstract void writeTo(OutputStream out) throws IOException;
697 
698   /**
699    * Writes a specified part of this byte string to an output stream.
700    *
701    * @param out the output stream to which to write the data.
702    * @param sourceOffset offset within these bytes
703    * @param numberToWrite number of bytes to write
704    * @throws IOException if an I/O error occurs.
705    * @throws IndexOutOfBoundsException if an offset or size is negative or too large
706    */
writeTo(OutputStream out, int sourceOffset, int numberToWrite)707   final void writeTo(OutputStream out, int sourceOffset, int numberToWrite) throws IOException {
708     checkRange(sourceOffset, sourceOffset + numberToWrite, size());
709     if (numberToWrite > 0) {
710       writeToInternal(out, sourceOffset, numberToWrite);
711     }
712   }
713 
714   /**
715    * Internal version of {@link #writeTo(OutputStream,int,int)} that assumes all error checking has
716    * already been done.
717    */
writeToInternal(OutputStream out, int sourceOffset, int numberToWrite)718   abstract void writeToInternal(OutputStream out, int sourceOffset, int numberToWrite)
719       throws IOException;
720 
721   /**
722    * Writes this {@link ByteString} to the provided {@link ByteOutput}. Calling this method may
723    * result in multiple operations on the target {@link ByteOutput}.
724    *
725    * <p>This method may expose internal backing buffers of the {@link ByteString} to the {@link
726    * ByteOutput} in order to avoid additional copying overhead. It would be possible for a malicious
727    * {@link ByteOutput} to corrupt the {@link ByteString}. Use with caution!
728    *
729    * @param byteOutput the output target to receive the bytes
730    * @throws IOException if an I/O error occurs
731    * @see UnsafeByteOperations#unsafeWriteTo(ByteString, ByteOutput)
732    */
writeTo(ByteOutput byteOutput)733   abstract void writeTo(ByteOutput byteOutput) throws IOException;
734 
735   /**
736    * This method behaves exactly the same as {@link #writeTo(ByteOutput)} unless the {@link
737    * ByteString} is a rope. For ropes, the leaf nodes are written in reverse order to the {@code
738    * byteOutput}.
739    *
740    * @param byteOutput the output target to receive the bytes
741    * @throws IOException if an I/O error occurs
742    * @see UnsafeByteOperations#unsafeWriteToReverse(ByteString, ByteOutput)
743    */
writeToReverse(ByteOutput byteOutput)744   abstract void writeToReverse(ByteOutput byteOutput) throws IOException;
745 
746   /**
747    * Constructs a read-only {@code java.nio.ByteBuffer} whose content is equal to the contents of
748    * this byte string. The result uses the same backing array as the byte string, if possible.
749    *
750    * @return wrapped bytes
751    */
asReadOnlyByteBuffer()752   public abstract ByteBuffer asReadOnlyByteBuffer();
753 
754   /**
755    * Constructs a list of read-only {@code java.nio.ByteBuffer} objects such that the concatenation
756    * of their contents is equal to the contents of this byte string. The result uses the same
757    * backing arrays as the byte string.
758    *
759    * <p>By returning a list, implementations of this method may be able to avoid copying even when
760    * there are multiple backing arrays.
761    *
762    * @return a list of wrapped bytes
763    */
asReadOnlyByteBufferList()764   public abstract List<ByteBuffer> asReadOnlyByteBufferList();
765 
766   /**
767    * Constructs a new {@code String} by decoding the bytes using the specified charset.
768    *
769    * @param charsetName encode using this charset
770    * @return new string
771    * @throws UnsupportedEncodingException if charset isn't recognized
772    */
toString(String charsetName)773   public final String toString(String charsetName) throws UnsupportedEncodingException {
774     try {
775       return toString(Charset.forName(charsetName));
776     } catch (UnsupportedCharsetException e) {
777       UnsupportedEncodingException exception = new UnsupportedEncodingException(charsetName);
778       exception.initCause(e);
779       throw exception;
780     }
781   }
782 
783   /**
784    * Constructs a new {@code String} by decoding the bytes using the specified charset. Returns the
785    * same empty String if empty.
786    *
787    * @param charset encode using this charset
788    * @return new string
789    */
toString(Charset charset)790   public final String toString(Charset charset) {
791     return size() == 0 ? "" : toStringInternal(charset);
792   }
793 
794   /**
795    * Constructs a new {@code String} by decoding the bytes using the specified charset.
796    *
797    * @param charset encode using this charset
798    * @return new string
799    */
toStringInternal(Charset charset)800   protected abstract String toStringInternal(Charset charset);
801 
802   // =================================================================
803   // UTF-8 decoding
804 
805   /**
806    * Constructs a new {@code String} by decoding the bytes as UTF-8.
807    *
808    * @return new string using UTF-8 encoding
809    */
toStringUtf8()810   public final String toStringUtf8() {
811     return toString(Internal.UTF_8);
812   }
813 
814   /**
815    * Tells whether this {@code ByteString} represents a well-formed UTF-8 byte sequence, such that
816    * the original bytes can be converted to a String object and then round tripped back to bytes
817    * without loss.
818    *
819    * <p>More precisely, returns {@code true} whenever:
820    *
821    * <pre>{@code
822    * Arrays.equals(byteString.toByteArray(),
823    *     new String(byteString.toByteArray(), "UTF-8").getBytes("UTF-8"))
824    * }</pre>
825    *
826    * <p>This method returns {@code false} for "overlong" byte sequences, as well as for 3-byte
827    * sequences that would map to a surrogate character, in accordance with the restricted definition
828    * of UTF-8 introduced in Unicode 3.1. Note that the UTF-8 decoder included in Oracle's JDK has
829    * been modified to also reject "overlong" byte sequences, but (as of 2011) still accepts 3-byte
830    * surrogate character byte sequences.
831    *
832    * <p>See the Unicode Standard,<br>
833    * Table 3-6. <em>UTF-8 Bit Distribution</em>,<br>
834    * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>.
835    *
836    * @return whether the bytes in this {@code ByteString} are a well-formed UTF-8 byte sequence
837    */
isValidUtf8()838   public abstract boolean isValidUtf8();
839 
840   /**
841    * Tells whether the given byte sequence is a well-formed, malformed, or incomplete UTF-8 byte
842    * sequence. This method accepts and returns a partial state result, allowing the bytes for a
843    * complete UTF-8 byte sequence to be composed from multiple {@code ByteString} segments.
844    *
845    * @param state either {@code 0} (if this is the initial decoding operation) or the value returned
846    *     from a call to a partial decoding method for the previous bytes
847    * @param offset offset of the first byte to check
848    * @param length number of bytes to check
849    * @return {@code -1} if the partial byte sequence is definitely malformed, {@code 0} if it is
850    *     well-formed (no additional input needed), or, if the byte sequence is "incomplete", i.e.
851    *     apparently terminated in the middle of a character, an opaque integer "state" value
852    *     containing enough information to decode the character when passed to a subsequent
853    *     invocation of a partial decoding method.
854    */
partialIsValidUtf8(int state, int offset, int length)855   protected abstract int partialIsValidUtf8(int state, int offset, int length);
856 
857   // =================================================================
858   // equals() and hashCode()
859 
860   @Override
861   public abstract boolean equals(Object o);
862 
863   /** Base class for leaf {@link ByteString}s (i.e. non-ropes). */
864   abstract static class LeafByteString extends ByteString {
865     @Override
getTreeDepth()866     protected final int getTreeDepth() {
867       return 0;
868     }
869 
870     @Override
isBalanced()871     protected final boolean isBalanced() {
872       return true;
873     }
874 
875     @Override
writeToReverse(ByteOutput byteOutput)876     void writeToReverse(ByteOutput byteOutput) throws IOException {
877       writeTo(byteOutput);
878     }
879 
880     /**
881      * Check equality of the substring of given length of this object starting at zero with another
882      * {@code ByteString} substring starting at offset.
883      *
884      * @param other what to compare a substring in
885      * @param offset offset into other
886      * @param length number of bytes to compare
887      * @return true for equality of substrings, else false.
888      */
equalsRange(ByteString other, int offset, int length)889     abstract boolean equalsRange(ByteString other, int offset, int length);
890   }
891 
892   /**
893    * Compute the hashCode using the traditional algorithm from {@link ByteString}.
894    *
895    * @return hashCode value
896    */
897   @Override
hashCode()898   public final int hashCode() {
899     int h = hash;
900 
901     if (h == 0) {
902       int size = size();
903       h = partialHash(size, 0, size);
904       if (h == 0) {
905         h = 1;
906       }
907       hash = h;
908     }
909     return h;
910   }
911 
912   // =================================================================
913   // Input stream
914 
915   /**
916    * Creates an {@code InputStream} which can be used to read the bytes.
917    *
918    * <p>The {@link InputStream} returned by this method is guaranteed to be completely non-blocking.
919    * The method {@link InputStream#available()} returns the number of bytes remaining in the stream.
920    * The methods {@link InputStream#read(byte[])}, {@link InputStream#read(byte[],int,int)} and
921    * {@link InputStream#skip(long)} will read/skip as many bytes as are available. The method {@link
922    * InputStream#markSupported()} returns {@code true}.
923    *
924    * <p>The methods in the returned {@link InputStream} might <b>not</b> be thread safe.
925    *
926    * @return an input stream that returns the bytes of this byte string.
927    */
newInput()928   public abstract InputStream newInput();
929 
930   /**
931    * Creates a {@link CodedInputStream} which can be used to read the bytes. Using this is often
932    * more efficient than creating a {@link CodedInputStream} that wraps the result of {@link
933    * #newInput()}.
934    *
935    * @return stream based on wrapped data
936    */
newCodedInput()937   public abstract CodedInputStream newCodedInput();
938 
939   // =================================================================
940   // Output stream
941 
942   /**
943    * Creates a new {@link Output} with the given initial capacity. Call {@link
944    * Output#toByteString()} to create the {@code ByteString} instance.
945    *
946    * <p>A {@link ByteString.Output} offers the same functionality as a {@link
947    * ByteArrayOutputStream}, except that it returns a {@link ByteString} rather than a {@code byte}
948    * array.
949    *
950    * @param initialCapacity estimate of number of bytes to be written
951    * @return {@code OutputStream} for building a {@code ByteString}
952    */
newOutput(int initialCapacity)953   public static Output newOutput(int initialCapacity) {
954     return new Output(initialCapacity);
955   }
956 
957   /**
958    * Creates a new {@link Output}. Call {@link Output#toByteString()} to create the {@code
959    * ByteString} instance.
960    *
961    * <p>A {@link ByteString.Output} offers the same functionality as a {@link
962    * ByteArrayOutputStream}, except that it returns a {@link ByteString} rather than a {@code byte
963    * array}.
964    *
965    * @return {@code OutputStream} for building a {@code ByteString}
966    */
newOutput()967   public static Output newOutput() {
968     return new Output(CONCATENATE_BY_COPY_SIZE);
969   }
970 
971   /**
972    * Outputs to a {@code ByteString} instance. Call {@link #toByteString()} to create the {@code
973    * ByteString} instance.
974    */
975   public static final class Output extends OutputStream {
976     // Implementation note.
977     // The public methods of this class must be synchronized.  ByteStrings
978     // are guaranteed to be immutable.  Without some sort of locking, it could
979     // be possible for one thread to call toByteSring(), while another thread
980     // is still modifying the underlying byte array.
981 
982     private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
983     // argument passed by user, indicating initial capacity.
984     private final int initialCapacity;
985     // ByteStrings to be concatenated to create the result
986     private final ArrayList<ByteString> flushedBuffers;
987     // Total number of bytes in the ByteStrings of flushedBuffers
988     private int flushedBuffersTotalBytes;
989     // Current buffer to which we are writing
990     private byte[] buffer;
991     // Location in buffer[] to which we write the next byte.
992     private int bufferPos;
993 
994     /**
995      * Creates a new ByteString output stream with the specified initial capacity.
996      *
997      * @param initialCapacity the initial capacity of the output stream.
998      */
Output(int initialCapacity)999     Output(int initialCapacity) {
1000       if (initialCapacity < 0) {
1001         throw new IllegalArgumentException("Buffer size < 0");
1002       }
1003       this.initialCapacity = initialCapacity;
1004       this.flushedBuffers = new ArrayList<ByteString>();
1005       this.buffer = new byte[initialCapacity];
1006     }
1007 
1008     @Override
write(int b)1009     public synchronized void write(int b) {
1010       if (bufferPos == buffer.length) {
1011         flushFullBuffer(1);
1012       }
1013       buffer[bufferPos++] = (byte) b;
1014     }
1015 
1016     @Override
write(byte[] b, int offset, int length)1017     public synchronized void write(byte[] b, int offset, int length) {
1018       if (length <= buffer.length - bufferPos) {
1019         // The bytes can fit into the current buffer.
1020         System.arraycopy(b, offset, buffer, bufferPos, length);
1021         bufferPos += length;
1022       } else {
1023         // Use up the current buffer
1024         int copySize = buffer.length - bufferPos;
1025         System.arraycopy(b, offset, buffer, bufferPos, copySize);
1026         offset += copySize;
1027         length -= copySize;
1028         // Flush the buffer, and get a new buffer at least big enough to cover
1029         // what we still need to output
1030         flushFullBuffer(length);
1031         System.arraycopy(b, offset, buffer, /* count= */ 0, length);
1032         bufferPos = length;
1033       }
1034     }
1035 
1036     /**
1037      * Creates a byte string. Its size is the current size of this output stream and its output has
1038      * been copied to it.
1039      *
1040      * @return the current contents of this output stream, as a byte string.
1041      */
toByteString()1042     public synchronized ByteString toByteString() {
1043       flushLastBuffer();
1044       return ByteString.copyFrom(flushedBuffers);
1045     }
1046 
1047     /** Implement java.util.Arrays.copyOf() for jdk 1.5. */
copyArray(byte[] buffer, int length)1048     private byte[] copyArray(byte[] buffer, int length) {
1049       byte[] result = new byte[length];
1050       System.arraycopy(buffer, 0, result, 0, Math.min(buffer.length, length));
1051       return result;
1052     }
1053 
1054     /**
1055      * Writes the complete contents of this byte array output stream to the specified output stream
1056      * argument.
1057      *
1058      * @param out the output stream to which to write the data.
1059      * @throws IOException if an I/O error occurs.
1060      */
writeTo(OutputStream out)1061     public void writeTo(OutputStream out) throws IOException {
1062       ByteString[] cachedFlushBuffers;
1063       byte[] cachedBuffer;
1064       int cachedBufferPos;
1065       synchronized (this) {
1066         // Copy the information we need into local variables so as to hold
1067         // the lock for as short a time as possible.
1068         cachedFlushBuffers = flushedBuffers.toArray(new ByteString[flushedBuffers.size()]);
1069         cachedBuffer = buffer;
1070         cachedBufferPos = bufferPos;
1071       }
1072       for (ByteString byteString : cachedFlushBuffers) {
1073         byteString.writeTo(out);
1074       }
1075 
1076       out.write(copyArray(cachedBuffer, cachedBufferPos));
1077     }
1078 
1079     /**
1080      * Returns the current size of the output stream.
1081      *
1082      * @return the current size of the output stream
1083      */
size()1084     public synchronized int size() {
1085       return flushedBuffersTotalBytes + bufferPos;
1086     }
1087 
1088     /**
1089      * Resets this stream, so that all currently accumulated output in the output stream is
1090      * discarded. The output stream can be used again, reusing the already allocated buffer space.
1091      */
reset()1092     public synchronized void reset() {
1093       flushedBuffers.clear();
1094       flushedBuffersTotalBytes = 0;
1095       bufferPos = 0;
1096     }
1097 
1098     @Override
toString()1099     public String toString() {
1100       return String.format(
1101           "<ByteString.Output@%s size=%d>",
1102           Integer.toHexString(System.identityHashCode(this)), size());
1103     }
1104 
1105     /**
1106      * Internal function used by writers. The current buffer is full, and the writer needs a new
1107      * buffer whose size is at least the specified minimum size.
1108      */
flushFullBuffer(int minSize)1109     private void flushFullBuffer(int minSize) {
1110       flushedBuffers.add(new LiteralByteString(buffer));
1111       flushedBuffersTotalBytes += buffer.length;
1112       // We want to increase our total capacity by 50%, but as a minimum,
1113       // the new buffer should also at least be >= minSize and
1114       // >= initial Capacity.
1115       int newSize = Math.max(initialCapacity, Math.max(minSize, flushedBuffersTotalBytes >>> 1));
1116       buffer = new byte[newSize];
1117       bufferPos = 0;
1118     }
1119 
1120     /**
1121      * Internal function used by {@link #toByteString()}. The current buffer may or may not be full,
1122      * but it needs to be flushed.
1123      */
flushLastBuffer()1124     private void flushLastBuffer() {
1125       if (bufferPos < buffer.length) {
1126         if (bufferPos > 0) {
1127           byte[] bufferCopy = copyArray(buffer, bufferPos);
1128           flushedBuffers.add(new LiteralByteString(bufferCopy));
1129         }
1130         // We reuse this buffer for further writes.
1131       } else {
1132         // Buffer is completely full.  Huzzah.
1133         flushedBuffers.add(new LiteralByteString(buffer));
1134         // 99% of the time, we're not going to use this OutputStream again.
1135         // We set buffer to an empty byte stream so that we're handling this
1136         // case without wasting space.  In the rare case that more writes
1137         // *do* occur, this empty buffer will be flushed and an appropriately
1138         // sized new buffer will be created.
1139         buffer = EMPTY_BYTE_ARRAY;
1140       }
1141       flushedBuffersTotalBytes += bufferPos;
1142       bufferPos = 0;
1143     }
1144   }
1145 
1146   /**
1147    * Constructs a new {@code ByteString} builder, which allows you to efficiently construct a {@code
1148    * ByteString} by writing to a {@link CodedOutputStream}. Using this is much more efficient than
1149    * calling {@code newOutput()} and wrapping that in a {@code CodedOutputStream}.
1150    *
1151    * <p>This is package-private because it's a somewhat confusing interface. Users can call {@link
1152    * Message#toByteString()} instead of calling this directly.
1153    *
1154    * @param size The target byte size of the {@code ByteString}. You must write exactly this many
1155    *     bytes before building the result.
1156    * @return the builder
1157    */
newCodedBuilder(int size)1158   static CodedBuilder newCodedBuilder(int size) {
1159     return new CodedBuilder(size);
1160   }
1161 
1162   /** See {@link ByteString#newCodedBuilder(int)}. */
1163   static final class CodedBuilder {
1164     private final CodedOutputStream output;
1165     private final byte[] buffer;
1166 
CodedBuilder(int size)1167     private CodedBuilder(int size) {
1168       buffer = new byte[size];
1169       output = CodedOutputStream.newInstance(buffer);
1170     }
1171 
build()1172     public ByteString build() {
1173       output.checkNoSpaceLeft();
1174 
1175       // We can be confident that the CodedOutputStream will not modify the
1176       // underlying bytes anymore because it already wrote all of them.  So,
1177       // no need to make a copy.
1178       return new LiteralByteString(buffer);
1179     }
1180 
getCodedOutput()1181     public CodedOutputStream getCodedOutput() {
1182       return output;
1183     }
1184   }
1185 
1186   // =================================================================
1187   // Methods {@link RopeByteString} needs on instances, which aren't part of the
1188   // public API.
1189 
1190   /**
1191    * Return the depth of the tree representing this {@code ByteString}, if any, whose root is this
1192    * node. If this is a leaf node, return 0.
1193    *
1194    * @return tree depth or zero
1195    */
getTreeDepth()1196   protected abstract int getTreeDepth();
1197 
1198   /**
1199    * Return {@code true} if this ByteString is literal (a leaf node) or a flat-enough tree in the
1200    * sense of {@link RopeByteString}.
1201    *
1202    * @return true if the tree is flat enough
1203    */
isBalanced()1204   protected abstract boolean isBalanced();
1205 
1206   /**
1207    * Return the cached hash code if available.
1208    *
1209    * @return value of cached hash code or 0 if not computed yet
1210    */
peekCachedHashCode()1211   protected final int peekCachedHashCode() {
1212     return hash;
1213   }
1214 
1215   /**
1216    * Compute the hash across the value bytes starting with the given hash, and return the result.
1217    * This is used to compute the hash across strings represented as a set of pieces by allowing the
1218    * hash computation to be continued from piece to piece.
1219    *
1220    * @param h starting hash value
1221    * @param offset offset into this value to start looking at data values
1222    * @param length number of data values to include in the hash computation
1223    * @return ending hash value
1224    */
partialHash(int h, int offset, int length)1225   protected abstract int partialHash(int h, int offset, int length);
1226 
1227   /**
1228    * Checks that the given index falls within the specified array size.
1229    *
1230    * @param index the index position to be tested
1231    * @param size the length of the array
1232    * @throws IndexOutOfBoundsException if the index does not fall within the array.
1233    */
checkIndex(int index, int size)1234   static void checkIndex(int index, int size) {
1235     if ((index | (size - (index + 1))) < 0) {
1236       if (index < 0) {
1237         throw new ArrayIndexOutOfBoundsException("Index < 0: " + index);
1238       }
1239       throw new ArrayIndexOutOfBoundsException("Index > length: " + index + ", " + size);
1240     }
1241   }
1242 
1243   /**
1244    * Checks that the given range falls within the bounds of an array
1245    *
1246    * @param startIndex the start index of the range (inclusive)
1247    * @param endIndex the end index of the range (exclusive)
1248    * @param size the size of the array.
1249    * @return the length of the range.
1250    * @throws IndexOutOfBoundsException some or all of the range falls outside of the array.
1251    */
checkRange(int startIndex, int endIndex, int size)1252   static int checkRange(int startIndex, int endIndex, int size) {
1253     final int length = endIndex - startIndex;
1254     if ((startIndex | endIndex | length | (size - endIndex)) < 0) {
1255       if (startIndex < 0) {
1256         throw new IndexOutOfBoundsException("Beginning index: " + startIndex + " < 0");
1257       }
1258       if (endIndex < startIndex) {
1259         throw new IndexOutOfBoundsException(
1260             "Beginning index larger than ending index: " + startIndex + ", " + endIndex);
1261       }
1262       // endIndex >= size
1263       throw new IndexOutOfBoundsException("End index: " + endIndex + " >= " + size);
1264     }
1265     return length;
1266   }
1267 
1268   @Override
toString()1269   public final String toString() {
1270     return String.format(
1271         "<ByteString@%s size=%d>", Integer.toHexString(System.identityHashCode(this)), size());
1272   }
1273 
1274   /**
1275    * This class implements a {@link com.google.protobuf.ByteString} backed by a single array of
1276    * bytes, contiguous in memory. It supports substring by pointing to only a sub-range of the
1277    * underlying byte array, meaning that a substring will reference the full byte-array of the
1278    * string it's made from, exactly as with {@link String}.
1279    *
1280    * @author carlanton@google.com (Carl Haverl)
1281    */
1282   // Keep this class private to avoid deadlocks in classloading across threads as ByteString's
1283   // static initializer loads LiteralByteString and another thread loads LiteralByteString.
1284   private static class LiteralByteString extends ByteString.LeafByteString {
1285     private static final long serialVersionUID = 1L;
1286 
1287     protected final byte[] bytes;
1288 
1289     /**
1290      * Creates a {@code LiteralByteString} backed by the given array, without copying.
1291      *
1292      * @param bytes array to wrap
1293      */
LiteralByteString(byte[] bytes)1294     LiteralByteString(byte[] bytes) {
1295       if (bytes == null) {
1296         throw new NullPointerException();
1297       }
1298       this.bytes = bytes;
1299     }
1300 
1301     @Override
byteAt(int index)1302     public byte byteAt(int index) {
1303       // Unlike most methods in this class, this one is a direct implementation
1304       // ignoring the potential offset because we need to do range-checking in the
1305       // substring case anyway.
1306       return bytes[index];
1307     }
1308 
1309     @Override
internalByteAt(int index)1310     byte internalByteAt(int index) {
1311       return bytes[index];
1312     }
1313 
1314     @Override
size()1315     public int size() {
1316       return bytes.length;
1317     }
1318 
1319     // =================================================================
1320     // ByteString -> substring
1321 
1322     @Override
substring(int beginIndex, int endIndex)1323     public final ByteString substring(int beginIndex, int endIndex) {
1324       final int length = checkRange(beginIndex, endIndex, size());
1325 
1326       if (length == 0) {
1327         return ByteString.EMPTY;
1328       }
1329 
1330       return new BoundedByteString(bytes, getOffsetIntoBytes() + beginIndex, length);
1331     }
1332 
1333     // =================================================================
1334     // ByteString -> byte[]
1335 
1336     @Override
copyToInternal( byte[] target, int sourceOffset, int targetOffset, int numberToCopy)1337     protected void copyToInternal(
1338         byte[] target, int sourceOffset, int targetOffset, int numberToCopy) {
1339       // Optimized form, not for subclasses, since we don't call
1340       // getOffsetIntoBytes() or check the 'numberToCopy' parameter.
1341       // TODO(nathanmittler): Is not calling getOffsetIntoBytes really saving that much?
1342       System.arraycopy(bytes, sourceOffset, target, targetOffset, numberToCopy);
1343     }
1344 
1345     @Override
copyTo(ByteBuffer target)1346     public final void copyTo(ByteBuffer target) {
1347       target.put(bytes, getOffsetIntoBytes(), size()); // Copies bytes
1348     }
1349 
1350     @Override
asReadOnlyByteBuffer()1351     public final ByteBuffer asReadOnlyByteBuffer() {
1352       return ByteBuffer.wrap(bytes, getOffsetIntoBytes(), size()).asReadOnlyBuffer();
1353     }
1354 
1355     @Override
asReadOnlyByteBufferList()1356     public final List<ByteBuffer> asReadOnlyByteBufferList() {
1357       return Collections.singletonList(asReadOnlyByteBuffer());
1358     }
1359 
1360     @Override
writeTo(OutputStream outputStream)1361     public final void writeTo(OutputStream outputStream) throws IOException {
1362       outputStream.write(toByteArray());
1363     }
1364 
1365     @Override
writeToInternal(OutputStream outputStream, int sourceOffset, int numberToWrite)1366     final void writeToInternal(OutputStream outputStream, int sourceOffset, int numberToWrite)
1367         throws IOException {
1368       outputStream.write(bytes, getOffsetIntoBytes() + sourceOffset, numberToWrite);
1369     }
1370 
1371     @Override
writeTo(ByteOutput output)1372     final void writeTo(ByteOutput output) throws IOException {
1373       output.writeLazy(bytes, getOffsetIntoBytes(), size());
1374     }
1375 
1376     @Override
toStringInternal(Charset charset)1377     protected final String toStringInternal(Charset charset) {
1378       return new String(bytes, getOffsetIntoBytes(), size(), charset);
1379     }
1380 
1381     // =================================================================
1382     // UTF-8 decoding
1383 
1384     @Override
isValidUtf8()1385     public final boolean isValidUtf8() {
1386       int offset = getOffsetIntoBytes();
1387       return Utf8.isValidUtf8(bytes, offset, offset + size());
1388     }
1389 
1390     @Override
partialIsValidUtf8(int state, int offset, int length)1391     protected final int partialIsValidUtf8(int state, int offset, int length) {
1392       int index = getOffsetIntoBytes() + offset;
1393       return Utf8.partialIsValidUtf8(state, bytes, index, index + length);
1394     }
1395 
1396     // =================================================================
1397     // equals() and hashCode()
1398 
1399     @Override
equals(Object other)1400     public final boolean equals(Object other) {
1401       if (other == this) {
1402         return true;
1403       }
1404       if (!(other instanceof ByteString)) {
1405         return false;
1406       }
1407 
1408       if (size() != ((ByteString) other).size()) {
1409         return false;
1410       }
1411       if (size() == 0) {
1412         return true;
1413       }
1414 
1415       if (other instanceof LiteralByteString) {
1416         LiteralByteString otherAsLiteral = (LiteralByteString) other;
1417         // If we know the hash codes and they are not equal, we know the byte
1418         // strings are not equal.
1419         int thisHash = peekCachedHashCode();
1420         int thatHash = otherAsLiteral.peekCachedHashCode();
1421         if (thisHash != 0 && thatHash != 0 && thisHash != thatHash) {
1422           return false;
1423         }
1424 
1425         return equalsRange((LiteralByteString) other, 0, size());
1426       } else {
1427         // RopeByteString and NioByteString.
1428         return other.equals(this);
1429       }
1430     }
1431 
1432     /**
1433      * Check equality of the substring of given length of this object starting at zero with another
1434      * {@code LiteralByteString} substring starting at offset.
1435      *
1436      * @param other what to compare a substring in
1437      * @param offset offset into other
1438      * @param length number of bytes to compare
1439      * @return true for equality of substrings, else false.
1440      */
1441     @Override
equalsRange(ByteString other, int offset, int length)1442     final boolean equalsRange(ByteString other, int offset, int length) {
1443       if (length > other.size()) {
1444         throw new IllegalArgumentException("Length too large: " + length + size());
1445       }
1446       if (offset + length > other.size()) {
1447         throw new IllegalArgumentException(
1448             "Ran off end of other: " + offset + ", " + length + ", " + other.size());
1449       }
1450 
1451       if (other instanceof LiteralByteString) {
1452         LiteralByteString lbsOther = (LiteralByteString) other;
1453         byte[] thisBytes = bytes;
1454         byte[] otherBytes = lbsOther.bytes;
1455         int thisLimit = getOffsetIntoBytes() + length;
1456         for (int thisIndex = getOffsetIntoBytes(),
1457                 otherIndex = lbsOther.getOffsetIntoBytes() + offset;
1458             (thisIndex < thisLimit);
1459             ++thisIndex, ++otherIndex) {
1460           if (thisBytes[thisIndex] != otherBytes[otherIndex]) {
1461             return false;
1462           }
1463         }
1464         return true;
1465       }
1466 
1467       return other.substring(offset, offset + length).equals(substring(0, length));
1468     }
1469 
1470     @Override
partialHash(int h, int offset, int length)1471     protected final int partialHash(int h, int offset, int length) {
1472       return Internal.partialHash(h, bytes, getOffsetIntoBytes() + offset, length);
1473     }
1474 
1475     // =================================================================
1476     // Input stream
1477 
1478     @Override
newInput()1479     public final InputStream newInput() {
1480       return new ByteArrayInputStream(bytes, getOffsetIntoBytes(), size()); // No copy
1481     }
1482 
1483     @Override
newCodedInput()1484     public final CodedInputStream newCodedInput() {
1485       // We trust CodedInputStream not to modify the bytes, or to give anyone
1486       // else access to them.
1487       return CodedInputStream.newInstance(
1488           bytes, getOffsetIntoBytes(), size(), /* bufferIsImmutable= */ true);
1489     }
1490 
1491     // =================================================================
1492     // Internal methods
1493 
1494     /**
1495      * Offset into {@code bytes[]} to use, non-zero for substrings.
1496      *
1497      * @return always 0 for this class
1498      */
getOffsetIntoBytes()1499     protected int getOffsetIntoBytes() {
1500       return 0;
1501     }
1502   }
1503 
1504   /**
1505    * This class is used to represent the substring of a {@link ByteString} over a single byte array.
1506    * In terms of the public API of {@link ByteString}, you end up here by calling {@link
1507    * ByteString#copyFrom(byte[])} followed by {@link ByteString#substring(int, int)}.
1508    *
1509    * <p>This class contains most of the overhead involved in creating a substring from a {@link
1510    * LiteralByteString}. The overhead involves some range-checking and two extra fields.
1511    *
1512    * @author carlanton@google.com (Carl Haverl)
1513    */
1514   // Keep this class private to avoid deadlocks in classloading across threads as ByteString's
1515   // static initializer loads LiteralByteString and another thread loads BoundedByteString.
1516   private static final class BoundedByteString extends LiteralByteString {
1517 
1518     private final int bytesOffset;
1519     private final int bytesLength;
1520 
1521     /**
1522      * Creates a {@code BoundedByteString} backed by the sub-range of given array, without copying.
1523      *
1524      * @param bytes array to wrap
1525      * @param offset index to first byte to use in bytes
1526      * @param length number of bytes to use from bytes
1527      * @throws IllegalArgumentException if {@code offset < 0}, {@code length < 0}, or if {@code
1528      *     offset + length > bytes.length}.
1529      */
BoundedByteString(byte[] bytes, int offset, int length)1530     BoundedByteString(byte[] bytes, int offset, int length) {
1531       super(bytes);
1532       checkRange(offset, offset + length, bytes.length);
1533 
1534       this.bytesOffset = offset;
1535       this.bytesLength = length;
1536     }
1537 
1538     /**
1539      * Gets the byte at the given index. Throws {@link ArrayIndexOutOfBoundsException} for
1540      * backwards-compatibility reasons although it would more properly be {@link
1541      * IndexOutOfBoundsException}.
1542      *
1543      * @param index index of byte
1544      * @return the value
1545      * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size
1546      */
1547     @Override
byteAt(int index)1548     public byte byteAt(int index) {
1549       // We must check the index ourselves as we cannot rely on Java array index
1550       // checking for substrings.
1551       checkIndex(index, size());
1552       return bytes[bytesOffset + index];
1553     }
1554 
1555     @Override
internalByteAt(int index)1556     byte internalByteAt(int index) {
1557       return bytes[bytesOffset + index];
1558     }
1559 
1560     @Override
size()1561     public int size() {
1562       return bytesLength;
1563     }
1564 
1565     @Override
getOffsetIntoBytes()1566     protected int getOffsetIntoBytes() {
1567       return bytesOffset;
1568     }
1569 
1570     // =================================================================
1571     // ByteString -> byte[]
1572 
1573     @Override
copyToInternal( byte[] target, int sourceOffset, int targetOffset, int numberToCopy)1574     protected void copyToInternal(
1575         byte[] target, int sourceOffset, int targetOffset, int numberToCopy) {
1576       System.arraycopy(
1577           bytes, getOffsetIntoBytes() + sourceOffset, target, targetOffset, numberToCopy);
1578     }
1579 
1580     // =================================================================
1581     // Serializable
1582 
1583     private static final long serialVersionUID = 1L;
1584 
writeReplace()1585     Object writeReplace() {
1586       return ByteString.wrap(toByteArray());
1587     }
1588 
readObject(@uppressWarnings"unused") ObjectInputStream in)1589     private void readObject(@SuppressWarnings("unused") ObjectInputStream in) throws IOException {
1590       throw new InvalidObjectException(
1591           "BoundedByteStream instances are not to be serialized directly");
1592     }
1593   }
1594 }
1595