1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 package com.google.protobuf;
32 
33 import java.io.ByteArrayOutputStream;
34 import java.io.IOException;
35 import java.io.InputStream;
36 import java.io.OutputStream;
37 import java.io.UnsupportedEncodingException;
38 import java.nio.ByteBuffer;
39 import java.util.ArrayList;
40 import java.util.Collection;
41 import java.util.Iterator;
42 import java.util.List;
43 import java.util.NoSuchElementException;
44 
45 /**
46  * Immutable sequence of bytes.  Substring is supported by sharing the reference
47  * to the immutable underlying bytes, as with {@link String}.  Concatenation is
48  * likewise supported without copying (long strings) by building a tree of
49  * pieces in {@link RopeByteString}.
50  * <p>
51  * Like {@link String}, the contents of a {@link ByteString} can never be
52  * observed to change, not even in the presence of a data race or incorrect
53  * API usage in the client code.
54  *
55  * @author crazybob@google.com Bob Lee
56  * @author kenton@google.com Kenton Varda
57  * @author carlanton@google.com Carl Haverl
58  * @author martinrb@google.com Martin Buchholz
59  */
60 public abstract class ByteString implements Iterable<Byte> {
61 
62   /**
63    * When two strings to be concatenated have a combined length shorter than
64    * this, we just copy their bytes on {@link #concat(ByteString)}.
65    * The trade-off is copy size versus the overhead of creating tree nodes
66    * in {@link RopeByteString}.
67    */
68   static final int CONCATENATE_BY_COPY_SIZE = 128;
69 
70   /**
71    * When copying an InputStream into a ByteString with .readFrom(),
72    * the chunks in the underlying rope start at 256 bytes, but double
73    * each iteration up to 8192 bytes.
74    */
75   static final int MIN_READ_FROM_CHUNK_SIZE = 0x100;  // 256b
76   static final int MAX_READ_FROM_CHUNK_SIZE = 0x2000;  // 8k
77 
78   /**
79    * Empty {@code ByteString}.
80    */
81   public static final ByteString EMPTY = new LiteralByteString(new byte[0]);
82 
83   // This constructor is here to prevent subclassing outside of this package,
ByteString()84   ByteString() {}
85 
86   /**
87    * Gets the byte at the given index. This method should be used only for
88    * random access to individual bytes. To access bytes sequentially, use the
89    * {@link ByteIterator} returned by {@link #iterator()}, and call {@link
90    * #substring(int, int)} first if necessary.
91    *
92    * @param index index of byte
93    * @return the value
94    * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size
95    */
byteAt(int index)96   public abstract byte byteAt(int index);
97 
98   /**
99    * Return a {@link ByteString.ByteIterator} over the bytes in the ByteString.
100    * To avoid auto-boxing, you may get the iterator manually and call
101    * {@link ByteIterator#nextByte()}.
102    *
103    * @return the iterator
104    */
iterator()105   public abstract ByteIterator iterator();
106 
107   /**
108    * This interface extends {@code Iterator<Byte>}, so that we can return an
109    * unboxed {@code byte}.
110    */
111   public interface ByteIterator extends Iterator<Byte> {
112     /**
113      * An alternative to {@link Iterator#next()} that returns an
114      * unboxed primitive {@code byte}.
115      *
116      * @return the next {@code byte} in the iteration
117      * @throws NoSuchElementException if the iteration has no more elements
118      */
nextByte()119     byte nextByte();
120   }
121 
122   /**
123    * Gets the number of bytes.
124    *
125    * @return size in bytes
126    */
size()127   public abstract int size();
128 
129   /**
130    * Returns {@code true} if the size is {@code 0}, {@code false} otherwise.
131    *
132    * @return true if this is zero bytes long
133    */
isEmpty()134   public boolean isEmpty() {
135     return size() == 0;
136   }
137 
138   // =================================================================
139   // ByteString -> substring
140 
141   /**
142    * Return the substring from {@code beginIndex}, inclusive, to the end of the
143    * string.
144    *
145    * @param beginIndex start at this index
146    * @return substring sharing underlying data
147    * @throws IndexOutOfBoundsException if {@code beginIndex < 0} or
148    *     {@code beginIndex > size()}.
149    */
substring(int beginIndex)150   public ByteString substring(int beginIndex) {
151     return substring(beginIndex, size());
152   }
153 
154   /**
155    * Return the substring from {@code beginIndex}, inclusive, to {@code
156    * endIndex}, exclusive.
157    *
158    * @param beginIndex start at this index
159    * @param endIndex   the last character is the one before this index
160    * @return substring sharing underlying data
161    * @throws IndexOutOfBoundsException if {@code beginIndex < 0},
162    *     {@code endIndex > size()}, or {@code beginIndex > endIndex}.
163    */
substring(int beginIndex, int endIndex)164   public abstract ByteString substring(int beginIndex, int endIndex);
165 
166   /**
167    * Tests if this bytestring starts with the specified prefix.
168    * Similar to {@link String#startsWith(String)}
169    *
170    * @param prefix the prefix.
171    * @return <code>true</code> if the byte sequence represented by the
172    *         argument is a prefix of the byte sequence represented by
173    *         this string; <code>false</code> otherwise.
174    */
startsWith(ByteString prefix)175   public boolean startsWith(ByteString prefix) {
176     return size() >= prefix.size() &&
177            substring(0, prefix.size()).equals(prefix);
178   }
179 
180   /**
181    * Tests if this bytestring ends with the specified suffix.
182    * Similar to {@link String#endsWith(String)}
183    *
184    * @param suffix the suffix.
185    * @return <code>true</code> if the byte sequence represented by the
186    *         argument is a suffix of the byte sequence represented by
187    *         this string; <code>false</code> otherwise.
188    */
endsWith(ByteString suffix)189   public boolean endsWith(ByteString suffix) {
190     return size() >= suffix.size() &&
191         substring(size() - suffix.size()).equals(suffix);
192   }
193 
194   // =================================================================
195   // byte[] -> ByteString
196 
197   /**
198    * Copies the given bytes into a {@code ByteString}.
199    *
200    * @param bytes source array
201    * @param offset offset in source array
202    * @param size number of bytes to copy
203    * @return new {@code ByteString}
204    */
copyFrom(byte[] bytes, int offset, int size)205   public static ByteString copyFrom(byte[] bytes, int offset, int size) {
206     byte[] copy = new byte[size];
207     System.arraycopy(bytes, offset, copy, 0, size);
208     return new LiteralByteString(copy);
209   }
210 
211   /**
212    * Copies the given bytes into a {@code ByteString}.
213    *
214    * @param bytes to copy
215    * @return new {@code ByteString}
216    */
copyFrom(byte[] bytes)217   public static ByteString copyFrom(byte[] bytes) {
218     return copyFrom(bytes, 0, bytes.length);
219   }
220 
221   /**
222    * Copies the next {@code size} bytes from a {@code java.nio.ByteBuffer} into
223    * a {@code ByteString}.
224    *
225    * @param bytes source buffer
226    * @param size number of bytes to copy
227    * @return new {@code ByteString}
228    */
copyFrom(ByteBuffer bytes, int size)229   public static ByteString copyFrom(ByteBuffer bytes, int size) {
230     byte[] copy = new byte[size];
231     bytes.get(copy);
232     return new LiteralByteString(copy);
233   }
234 
235   /**
236    * Copies the remaining bytes from a {@code java.nio.ByteBuffer} into
237    * a {@code ByteString}.
238    *
239    * @param bytes sourceBuffer
240    * @return new {@code ByteString}
241    */
copyFrom(ByteBuffer bytes)242   public static ByteString copyFrom(ByteBuffer bytes) {
243     return copyFrom(bytes, bytes.remaining());
244   }
245 
246   /**
247    * Encodes {@code text} into a sequence of bytes using the named charset
248    * and returns the result as a {@code ByteString}.
249    *
250    * @param text source string
251    * @param charsetName encoding to use
252    * @return new {@code ByteString}
253    * @throws UnsupportedEncodingException if the encoding isn't found
254    */
copyFrom(String text, String charsetName)255   public static ByteString copyFrom(String text, String charsetName)
256       throws UnsupportedEncodingException {
257     return new LiteralByteString(text.getBytes(charsetName));
258   }
259 
260   /**
261    * Encodes {@code text} into a sequence of UTF-8 bytes and returns the
262    * result as a {@code ByteString}.
263    *
264    * @param text source string
265    * @return new {@code ByteString}
266    */
copyFromUtf8(String text)267   public static ByteString copyFromUtf8(String text) {
268     try {
269       return new LiteralByteString(text.getBytes("UTF-8"));
270     } catch (UnsupportedEncodingException e) {
271       throw new RuntimeException("UTF-8 not supported?", e);
272     }
273   }
274 
275   // =================================================================
276   // InputStream -> ByteString
277 
278   /**
279    * Completely reads the given stream's bytes into a
280    * {@code ByteString}, blocking if necessary until all bytes are
281    * read through to the end of the stream.
282    *
283    * <b>Performance notes:</b> The returned {@code ByteString} is an
284    * immutable tree of byte arrays ("chunks") of the stream data.  The
285    * first chunk is small, with subsequent chunks each being double
286    * the size, up to 8K.  If the caller knows the precise length of
287    * the stream and wishes to avoid all unnecessary copies and
288    * allocations, consider using the two-argument version of this
289    * method, below.
290    *
291    * @param streamToDrain The source stream, which is read completely
292    *     but not closed.
293    * @return A new {@code ByteString} which is made up of chunks of
294    *     various sizes, depending on the behavior of the underlying
295    *     stream.
296    * @throws IOException IOException is thrown if there is a problem
297    *     reading the underlying stream.
298    */
readFrom(InputStream streamToDrain)299   public static ByteString readFrom(InputStream streamToDrain)
300       throws IOException {
301     return readFrom(
302         streamToDrain, MIN_READ_FROM_CHUNK_SIZE, MAX_READ_FROM_CHUNK_SIZE);
303   }
304 
305   /**
306    * Completely reads the given stream's bytes into a
307    * {@code ByteString}, blocking if necessary until all bytes are
308    * read through to the end of the stream.
309    *
310    * <b>Performance notes:</b> The returned {@code ByteString} is an
311    * immutable tree of byte arrays ("chunks") of the stream data.  The
312    * chunkSize parameter sets the size of these byte arrays. In
313    * particular, if the chunkSize is precisely the same as the length
314    * of the stream, unnecessary allocations and copies will be
315    * avoided. Otherwise, the chunks will be of the given size, except
316    * for the last chunk, which will be resized (via a reallocation and
317    * copy) to contain the remainder of the stream.
318    *
319    * @param streamToDrain The source stream, which is read completely
320    *     but not closed.
321    * @param chunkSize The size of the chunks in which to read the
322    *     stream.
323    * @return A new {@code ByteString} which is made up of chunks of
324    *     the given size.
325    * @throws IOException IOException is thrown if there is a problem
326    *     reading the underlying stream.
327    */
readFrom(InputStream streamToDrain, int chunkSize)328   public static ByteString readFrom(InputStream streamToDrain, int chunkSize)
329       throws IOException {
330     return readFrom(streamToDrain, chunkSize, chunkSize);
331   }
332 
333   // Helper method that takes the chunk size range as a parameter.
readFrom(InputStream streamToDrain, int minChunkSize, int maxChunkSize)334   public static ByteString readFrom(InputStream streamToDrain, int minChunkSize,
335       int maxChunkSize) throws IOException {
336     Collection<ByteString> results = new ArrayList<ByteString>();
337 
338     // copy the inbound bytes into a list of chunks; the chunk size
339     // grows exponentially to support both short and long streams.
340     int chunkSize = minChunkSize;
341     while (true) {
342       ByteString chunk = readChunk(streamToDrain, chunkSize);
343       if (chunk == null) {
344         break;
345       }
346       results.add(chunk);
347       chunkSize = Math.min(chunkSize * 2, maxChunkSize);
348     }
349 
350     return ByteString.copyFrom(results);
351   }
352 
353   /**
354    * Blocks until a chunk of the given size can be made from the
355    * stream, or EOF is reached.  Calls read() repeatedly in case the
356    * given stream implementation doesn't completely fill the given
357    * buffer in one read() call.
358    *
359    * @return A chunk of the desired size, or else a chunk as large as
360    * was available when end of stream was reached. Returns null if the
361    * given stream had no more data in it.
362    */
readChunk(InputStream in, final int chunkSize)363   private static ByteString readChunk(InputStream in, final int chunkSize)
364       throws IOException {
365       final byte[] buf = new byte[chunkSize];
366       int bytesRead = 0;
367       while (bytesRead < chunkSize) {
368         final int count = in.read(buf, bytesRead, chunkSize - bytesRead);
369         if (count == -1) {
370           break;
371         }
372         bytesRead += count;
373       }
374 
375       if (bytesRead == 0) {
376         return null;
377       } else {
378         return ByteString.copyFrom(buf, 0, bytesRead);
379       }
380   }
381 
382   // =================================================================
383   // Multiple ByteStrings -> One ByteString
384 
385   /**
386    * Concatenate the given {@code ByteString} to this one. Short concatenations,
387    * of total size smaller than {@link ByteString#CONCATENATE_BY_COPY_SIZE}, are
388    * produced by copying the underlying bytes (as per Rope.java, <a
389    * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf">
390    * BAP95 </a>. In general, the concatenate involves no copying.
391    *
392    * @param other string to concatenate
393    * @return a new {@code ByteString} instance
394    */
concat(ByteString other)395   public ByteString concat(ByteString other) {
396     int thisSize = size();
397     int otherSize = other.size();
398     if ((long) thisSize + otherSize >= Integer.MAX_VALUE) {
399       throw new IllegalArgumentException("ByteString would be too long: " +
400                                          thisSize + "+" + otherSize);
401     }
402 
403     return RopeByteString.concatenate(this, other);
404   }
405 
406   /**
407    * Concatenates all byte strings in the iterable and returns the result.
408    * This is designed to run in O(list size), not O(total bytes).
409    *
410    * <p>The returned {@code ByteString} is not necessarily a unique object.
411    * If the list is empty, the returned object is the singleton empty
412    * {@code ByteString}.  If the list has only one element, that
413    * {@code ByteString} will be returned without copying.
414    *
415    * @param byteStrings strings to be concatenated
416    * @return new {@code ByteString}
417    */
copyFrom(Iterable<ByteString> byteStrings)418   public static ByteString copyFrom(Iterable<ByteString> byteStrings) {
419     Collection<ByteString> collection;
420     if (!(byteStrings instanceof Collection)) {
421       collection = new ArrayList<ByteString>();
422       for (ByteString byteString : byteStrings) {
423         collection.add(byteString);
424       }
425     } else {
426       collection = (Collection<ByteString>) byteStrings;
427     }
428     ByteString result;
429     if (collection.isEmpty()) {
430       result = EMPTY;
431     } else {
432       result = balancedConcat(collection.iterator(), collection.size());
433     }
434     return result;
435   }
436 
437   // Internal function used by copyFrom(Iterable<ByteString>).
438   // Create a balanced concatenation of the next "length" elements from the
439   // iterable.
balancedConcat(Iterator<ByteString> iterator, int length)440   private static ByteString balancedConcat(Iterator<ByteString> iterator,
441       int length) {
442     assert length >= 1;
443     ByteString result;
444     if (length == 1) {
445       result = iterator.next();
446     } else {
447       int halfLength = length >>> 1;
448       ByteString left = balancedConcat(iterator, halfLength);
449       ByteString right = balancedConcat(iterator, length - halfLength);
450       result = left.concat(right);
451     }
452     return result;
453   }
454 
455   // =================================================================
456   // ByteString -> byte[]
457 
458   /**
459    * Copies bytes into a buffer at the given offset.
460    *
461    * @param target buffer to copy into
462    * @param offset in the target buffer
463    * @throws IndexOutOfBoundsException if the offset is negative or too large
464    */
copyTo(byte[] target, int offset)465   public void copyTo(byte[] target, int offset) {
466     copyTo(target, 0, offset, size());
467   }
468 
469   /**
470    * Copies bytes into a buffer.
471    *
472    * @param target       buffer to copy into
473    * @param sourceOffset offset within these bytes
474    * @param targetOffset offset within the target buffer
475    * @param numberToCopy number of bytes to copy
476    * @throws IndexOutOfBoundsException if an offset or size is negative or too
477    *     large
478    */
copyTo(byte[] target, int sourceOffset, int targetOffset, int numberToCopy)479   public void copyTo(byte[] target, int sourceOffset, int targetOffset,
480       int numberToCopy) {
481     if (sourceOffset < 0) {
482       throw new IndexOutOfBoundsException("Source offset < 0: " + sourceOffset);
483     }
484     if (targetOffset < 0) {
485       throw new IndexOutOfBoundsException("Target offset < 0: " + targetOffset);
486     }
487     if (numberToCopy < 0) {
488       throw new IndexOutOfBoundsException("Length < 0: " + numberToCopy);
489     }
490     if (sourceOffset + numberToCopy > size()) {
491       throw new IndexOutOfBoundsException(
492           "Source end offset < 0: " + (sourceOffset + numberToCopy));
493     }
494     if (targetOffset + numberToCopy > target.length) {
495       throw new IndexOutOfBoundsException(
496           "Target end offset < 0: " + (targetOffset + numberToCopy));
497     }
498     if (numberToCopy > 0) {
499       copyToInternal(target, sourceOffset, targetOffset, numberToCopy);
500     }
501   }
502 
503   /**
504    * Internal (package private) implementation of
505    * @link{#copyTo(byte[],int,int,int}.
506    * It assumes that all error checking has already been performed and that
507    * @code{numberToCopy > 0}.
508    */
copyToInternal(byte[] target, int sourceOffset, int targetOffset, int numberToCopy)509   protected abstract void copyToInternal(byte[] target, int sourceOffset,
510       int targetOffset, int numberToCopy);
511 
512   /**
513    * Copies bytes into a ByteBuffer.
514    *
515    * @param target ByteBuffer to copy into.
516    * @throws java.nio.ReadOnlyBufferException if the {@code target} is read-only
517    * @throws java.nio.BufferOverflowException if the {@code target}'s
518    *     remaining() space is not large enough to hold the data.
519    */
copyTo(ByteBuffer target)520   public abstract void copyTo(ByteBuffer target);
521 
522   /**
523    * Copies bytes to a {@code byte[]}.
524    *
525    * @return copied bytes
526    */
toByteArray()527   public byte[] toByteArray() {
528     int size = size();
529     if (size == 0) {
530       return Internal.EMPTY_BYTE_ARRAY;
531     }
532     byte[] result = new byte[size];
533     copyToInternal(result, 0, 0, size);
534     return result;
535   }
536 
537   /**
538    * Writes the complete contents of this byte string to
539    * the specified output stream argument.
540    *
541    * @param  out  the output stream to which to write the data.
542    * @throws IOException  if an I/O error occurs.
543    */
writeTo(OutputStream out)544   public abstract void writeTo(OutputStream out) throws IOException;
545 
546   /**
547    * Writes a specified part of this byte string to an output stream.
548    *
549    * @param  out  the output stream to which to write the data.
550    * @param  sourceOffset offset within these bytes
551    * @param  numberToWrite number of bytes to write
552    * @throws IOException  if an I/O error occurs.
553    * @throws IndexOutOfBoundsException if an offset or size is negative or too
554    *     large
555    */
writeTo(OutputStream out, int sourceOffset, int numberToWrite)556   void writeTo(OutputStream out, int sourceOffset, int numberToWrite)
557       throws IOException {
558     if (sourceOffset < 0) {
559       throw new IndexOutOfBoundsException("Source offset < 0: " + sourceOffset);
560     }
561     if (numberToWrite < 0) {
562       throw new IndexOutOfBoundsException("Length < 0: " + numberToWrite);
563     }
564     if (sourceOffset + numberToWrite > size()) {
565       throw new IndexOutOfBoundsException(
566           "Source end offset exceeded: " + (sourceOffset + numberToWrite));
567     }
568     if (numberToWrite > 0) {
569       writeToInternal(out, sourceOffset, numberToWrite);
570     }
571 
572   }
573 
574   /**
575    * Internal version of {@link #writeTo(OutputStream,int,int)} that assumes
576    * all error checking has already been done.
577    */
writeToInternal(OutputStream out, int sourceOffset, int numberToWrite)578   abstract void writeToInternal(OutputStream out, int sourceOffset,
579       int numberToWrite) throws IOException;
580 
581   /**
582    * Constructs a read-only {@code java.nio.ByteBuffer} whose content
583    * is equal to the contents of this byte string.
584    * The result uses the same backing array as the byte string, if possible.
585    *
586    * @return wrapped bytes
587    */
asReadOnlyByteBuffer()588   public abstract ByteBuffer asReadOnlyByteBuffer();
589 
590   /**
591    * Constructs a list of read-only {@code java.nio.ByteBuffer} objects
592    * such that the concatenation of their contents is equal to the contents
593    * of this byte string.  The result uses the same backing arrays as the
594    * byte string.
595    * <p>
596    * By returning a list, implementations of this method may be able to avoid
597    * copying even when there are multiple backing arrays.
598    *
599    * @return a list of wrapped bytes
600    */
asReadOnlyByteBufferList()601   public abstract List<ByteBuffer> asReadOnlyByteBufferList();
602 
603   /**
604    * Constructs a new {@code String} by decoding the bytes using the
605    * specified charset.
606    *
607    * @param charsetName encode using this charset
608    * @return new string
609    * @throws UnsupportedEncodingException if charset isn't recognized
610    */
toString(String charsetName)611   public abstract String toString(String charsetName)
612       throws UnsupportedEncodingException;
613 
614   // =================================================================
615   // UTF-8 decoding
616 
617   /**
618    * Constructs a new {@code String} by decoding the bytes as UTF-8.
619    *
620    * @return new string using UTF-8 encoding
621    */
toStringUtf8()622   public String toStringUtf8() {
623     try {
624       return toString("UTF-8");
625     } catch (UnsupportedEncodingException e) {
626       throw new RuntimeException("UTF-8 not supported?", e);
627     }
628   }
629 
630   /**
631    * Tells whether this {@code ByteString} represents a well-formed UTF-8
632    * byte sequence, such that the original bytes can be converted to a
633    * String object and then round tripped back to bytes without loss.
634    *
635    * <p>More precisely, returns {@code true} whenever: <pre> {@code
636    * Arrays.equals(byteString.toByteArray(),
637    *     new String(byteString.toByteArray(), "UTF-8").getBytes("UTF-8"))
638    * }</pre>
639    *
640    * <p>This method returns {@code false} for "overlong" byte sequences,
641    * as well as for 3-byte sequences that would map to a surrogate
642    * character, in accordance with the restricted definition of UTF-8
643    * introduced in Unicode 3.1.  Note that the UTF-8 decoder included in
644    * Oracle's JDK has been modified to also reject "overlong" byte
645    * sequences, but (as of 2011) still accepts 3-byte surrogate
646    * character byte sequences.
647    *
648    * <p>See the Unicode Standard,</br>
649    * Table 3-6. <em>UTF-8 Bit Distribution</em>,</br>
650    * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>.
651    *
652    * @return whether the bytes in this {@code ByteString} are a
653    * well-formed UTF-8 byte sequence
654    */
isValidUtf8()655   public abstract boolean isValidUtf8();
656 
657   /**
658    * Tells whether the given byte sequence is a well-formed, malformed, or
659    * incomplete UTF-8 byte sequence.  This method accepts and returns a partial
660    * state result, allowing the bytes for a complete UTF-8 byte sequence to be
661    * composed from multiple {@code ByteString} segments.
662    *
663    * @param state either {@code 0} (if this is the initial decoding operation)
664    *     or the value returned from a call to a partial decoding method for the
665    *     previous bytes
666    * @param offset offset of the first byte to check
667    * @param length number of bytes to check
668    *
669    * @return {@code -1} if the partial byte sequence is definitely malformed,
670    * {@code 0} if it is well-formed (no additional input needed), or, if the
671    * byte sequence is "incomplete", i.e. apparently terminated in the middle of
672    * a character, an opaque integer "state" value containing enough information
673    * to decode the character when passed to a subsequent invocation of a
674    * partial decoding method.
675    */
partialIsValidUtf8(int state, int offset, int length)676   protected abstract int partialIsValidUtf8(int state, int offset, int length);
677 
678   // =================================================================
679   // equals() and hashCode()
680 
681   @Override
equals(Object o)682   public abstract boolean equals(Object o);
683 
684   /**
685    * Return a non-zero hashCode depending only on the sequence of bytes
686    * in this ByteString.
687    *
688    * @return hashCode value for this object
689    */
690   @Override
hashCode()691   public abstract int hashCode();
692 
693   // =================================================================
694   // Input stream
695 
696   /**
697    * Creates an {@code InputStream} which can be used to read the bytes.
698    * <p>
699    * The {@link InputStream} returned by this method is guaranteed to be
700    * completely non-blocking.  The method {@link InputStream#available()}
701    * returns the number of bytes remaining in the stream. The methods
702    * {@link InputStream#read(byte[]), {@link InputStream#read(byte[],int,int)}
703    * and {@link InputStream#skip(long)} will read/skip as many bytes as are
704    * available.
705    * <p>
706    * The methods in the returned {@link InputStream} might <b>not</b> be
707    * thread safe.
708    *
709    * @return an input stream that returns the bytes of this byte string.
710    */
newInput()711   public abstract InputStream newInput();
712 
713   /**
714    * Creates a {@link CodedInputStream} which can be used to read the bytes.
715    * Using this is often more efficient than creating a {@link CodedInputStream}
716    * that wraps the result of {@link #newInput()}.
717    *
718    * @return stream based on wrapped data
719    */
newCodedInput()720   public abstract CodedInputStream newCodedInput();
721 
722   // =================================================================
723   // Output stream
724 
725   /**
726    * Creates a new {@link Output} with the given initial capacity. Call {@link
727    * Output#toByteString()} to create the {@code ByteString} instance.
728    * <p>
729    * A {@link ByteString.Output} offers the same functionality as a
730    * {@link ByteArrayOutputStream}, except that it returns a {@link ByteString}
731    * rather than a {@code byte} array.
732    *
733    * @param initialCapacity estimate of number of bytes to be written
734    * @return {@code OutputStream} for building a {@code ByteString}
735    */
newOutput(int initialCapacity)736   public static Output newOutput(int initialCapacity) {
737     return new Output(initialCapacity);
738   }
739 
740   /**
741    * Creates a new {@link Output}. Call {@link Output#toByteString()} to create
742    * the {@code ByteString} instance.
743    * <p>
744    * A {@link ByteString.Output} offers the same functionality as a
745    * {@link ByteArrayOutputStream}, except that it returns a {@link ByteString}
746    * rather than a {@code byte array}.
747    *
748    * @return {@code OutputStream} for building a {@code ByteString}
749    */
newOutput()750   public static Output newOutput() {
751     return new Output(CONCATENATE_BY_COPY_SIZE);
752   }
753 
754   /**
755    * Outputs to a {@code ByteString} instance. Call {@link #toByteString()} to
756    * create the {@code ByteString} instance.
757    */
758   public static final class Output extends OutputStream {
759     // Implementation note.
760     // The public methods of this class must be synchronized.  ByteStrings
761     // are guaranteed to be immutable.  Without some sort of locking, it could
762     // be possible for one thread to call toByteSring(), while another thread
763     // is still modifying the underlying byte array.
764 
765     private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
766     // argument passed by user, indicating initial capacity.
767     private final int initialCapacity;
768     // ByteStrings to be concatenated to create the result
769     private final ArrayList<ByteString> flushedBuffers;
770     // Total number of bytes in the ByteStrings of flushedBuffers
771     private int flushedBuffersTotalBytes;
772     // Current buffer to which we are writing
773     private byte[] buffer;
774     // Location in buffer[] to which we write the next byte.
775     private int bufferPos;
776 
777     /**
778      * Creates a new ByteString output stream with the specified
779      * initial capacity.
780      *
781      * @param initialCapacity  the initial capacity of the output stream.
782      */
Output(int initialCapacity)783     Output(int initialCapacity) {
784       if (initialCapacity < 0) {
785         throw new IllegalArgumentException("Buffer size < 0");
786       }
787       this.initialCapacity = initialCapacity;
788       this.flushedBuffers = new ArrayList<ByteString>();
789       this.buffer = new byte[initialCapacity];
790     }
791 
792     @Override
write(int b)793     public synchronized void write(int b) {
794       if (bufferPos == buffer.length) {
795         flushFullBuffer(1);
796       }
797       buffer[bufferPos++] = (byte)b;
798     }
799 
800     @Override
write(byte[] b, int offset, int length)801     public synchronized void write(byte[] b, int offset, int length)  {
802       if (length <= buffer.length - bufferPos) {
803         // The bytes can fit into the current buffer.
804         System.arraycopy(b, offset, buffer, bufferPos, length);
805         bufferPos += length;
806       } else {
807         // Use up the current buffer
808         int copySize  = buffer.length - bufferPos;
809         System.arraycopy(b, offset, buffer, bufferPos, copySize);
810         offset += copySize;
811         length -= copySize;
812         // Flush the buffer, and get a new buffer at least big enough to cover
813         // what we still need to output
814         flushFullBuffer(length);
815         System.arraycopy(b, offset, buffer, 0 /* count */, length);
816         bufferPos = length;
817       }
818     }
819 
820     /**
821      * Creates a byte string. Its size is the current size of this output
822      * stream and its output has been copied to it.
823      *
824      * @return  the current contents of this output stream, as a byte string.
825      */
toByteString()826     public synchronized ByteString toByteString() {
827       flushLastBuffer();
828       return ByteString.copyFrom(flushedBuffers);
829     }
830 
831     /**
832      * Implement java.util.Arrays.copyOf() for jdk 1.5.
833      */
copyArray(byte[] buffer, int length)834     private byte[] copyArray(byte[] buffer, int length) {
835       byte[] result = new byte[length];
836       System.arraycopy(buffer, 0, result, 0, Math.min(buffer.length, length));
837       return result;
838     }
839 
840     /**
841      * Writes the complete contents of this byte array output stream to
842      * the specified output stream argument.
843      *
844      * @param out the output stream to which to write the data.
845      * @throws IOException  if an I/O error occurs.
846      */
writeTo(OutputStream out)847     public void writeTo(OutputStream out) throws IOException {
848       ByteString[] cachedFlushBuffers;
849       byte[] cachedBuffer;
850       int cachedBufferPos;
851       synchronized (this) {
852         // Copy the information we need into local variables so as to hold
853         // the lock for as short a time as possible.
854         cachedFlushBuffers =
855             flushedBuffers.toArray(new ByteString[flushedBuffers.size()]);
856         cachedBuffer = buffer;
857         cachedBufferPos = bufferPos;
858       }
859       for (ByteString byteString : cachedFlushBuffers) {
860         byteString.writeTo(out);
861       }
862 
863       out.write(copyArray(cachedBuffer, cachedBufferPos));
864     }
865 
866     /**
867      * Returns the current size of the output stream.
868      *
869      * @return  the current size of the output stream
870      */
size()871     public synchronized int size() {
872       return flushedBuffersTotalBytes + bufferPos;
873     }
874 
875     /**
876      * Resets this stream, so that all currently accumulated output in the
877      * output stream is discarded. The output stream can be used again,
878      * reusing the already allocated buffer space.
879      */
reset()880     public synchronized void reset() {
881       flushedBuffers.clear();
882       flushedBuffersTotalBytes = 0;
883       bufferPos = 0;
884     }
885 
886     @Override
toString()887     public String toString() {
888       return String.format("<ByteString.Output@%s size=%d>",
889           Integer.toHexString(System.identityHashCode(this)), size());
890     }
891 
892     /**
893      * Internal function used by writers.  The current buffer is full, and the
894      * writer needs a new buffer whose size is at least the specified minimum
895      * size.
896      */
flushFullBuffer(int minSize)897     private void flushFullBuffer(int minSize)  {
898       flushedBuffers.add(new LiteralByteString(buffer));
899       flushedBuffersTotalBytes += buffer.length;
900       // We want to increase our total capacity by 50%, but as a minimum,
901       // the new buffer should also at least be >= minSize and
902       // >= initial Capacity.
903       int newSize = Math.max(initialCapacity,
904           Math.max(minSize, flushedBuffersTotalBytes >>> 1));
905       buffer = new byte[newSize];
906       bufferPos = 0;
907     }
908 
909     /**
910      * Internal function used by {@link #toByteString()}. The current buffer may
911      * or may not be full, but it needs to be flushed.
912      */
flushLastBuffer()913     private void flushLastBuffer()  {
914       if (bufferPos < buffer.length) {
915         if (bufferPos > 0) {
916           byte[] bufferCopy = copyArray(buffer, bufferPos);
917           flushedBuffers.add(new LiteralByteString(bufferCopy));
918         }
919         // We reuse this buffer for further writes.
920       } else {
921         // Buffer is completely full.  Huzzah.
922         flushedBuffers.add(new LiteralByteString(buffer));
923         // 99% of the time, we're not going to use this OutputStream again.
924         // We set buffer to an empty byte stream so that we're handling this
925         // case without wasting space.  In the rare case that more writes
926         // *do* occur, this empty buffer will be flushed and an appropriately
927         // sized new buffer will be created.
928         buffer = EMPTY_BYTE_ARRAY;
929       }
930       flushedBuffersTotalBytes += bufferPos;
931       bufferPos = 0;
932     }
933   }
934 
935   /**
936    * Constructs a new {@code ByteString} builder, which allows you to
937    * efficiently construct a {@code ByteString} by writing to a {@link
938    * CodedOutputStream}. Using this is much more efficient than calling {@code
939    * newOutput()} and wrapping that in a {@code CodedOutputStream}.
940    *
941    * <p>This is package-private because it's a somewhat confusing interface.
942    * Users can call {@link Message#toByteString()} instead of calling this
943    * directly.
944    *
945    * @param size The target byte size of the {@code ByteString}.  You must write
946    *     exactly this many bytes before building the result.
947    * @return the builder
948    */
newCodedBuilder(int size)949   static CodedBuilder newCodedBuilder(int size) {
950     return new CodedBuilder(size);
951   }
952 
953   /** See {@link ByteString#newCodedBuilder(int)}. */
954   static final class CodedBuilder {
955     private final CodedOutputStream output;
956     private final byte[] buffer;
957 
CodedBuilder(int size)958     private CodedBuilder(int size) {
959       buffer = new byte[size];
960       output = CodedOutputStream.newInstance(buffer);
961     }
962 
build()963     public ByteString build() {
964       output.checkNoSpaceLeft();
965 
966       // We can be confident that the CodedOutputStream will not modify the
967       // underlying bytes anymore because it already wrote all of them.  So,
968       // no need to make a copy.
969       return new LiteralByteString(buffer);
970     }
971 
getCodedOutput()972     public CodedOutputStream getCodedOutput() {
973       return output;
974     }
975   }
976 
977   // =================================================================
978   // Methods {@link RopeByteString} needs on instances, which aren't part of the
979   // public API.
980 
981   /**
982    * Return the depth of the tree representing this {@code ByteString}, if any,
983    * whose root is this node. If this is a leaf node, return 0.
984    *
985    * @return tree depth or zero
986    */
getTreeDepth()987   protected abstract int getTreeDepth();
988 
989   /**
990    * Return {@code true} if this ByteString is literal (a leaf node) or a
991    * flat-enough tree in the sense of {@link RopeByteString}.
992    *
993    * @return true if the tree is flat enough
994    */
isBalanced()995   protected abstract boolean isBalanced();
996 
997   /**
998    * Return the cached hash code if available.
999    *
1000    * @return value of cached hash code or 0 if not computed yet
1001    */
peekCachedHashCode()1002   protected abstract int peekCachedHashCode();
1003 
1004   /**
1005    * Compute the hash across the value bytes starting with the given hash, and
1006    * return the result.  This is used to compute the hash across strings
1007    * represented as a set of pieces by allowing the hash computation to be
1008    * continued from piece to piece.
1009    *
1010    * @param h starting hash value
1011    * @param offset offset into this value to start looking at data values
1012    * @param length number of data values to include in the hash computation
1013    * @return ending hash value
1014    */
partialHash(int h, int offset, int length)1015   protected abstract int partialHash(int h, int offset, int length);
1016 
1017   @Override
toString()1018   public String toString() {
1019     return String.format("<ByteString@%s size=%d>",
1020         Integer.toHexString(System.identityHashCode(this)), size());
1021   }
1022 }
1023