1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 package com.google.protobuf;
32 
33 import java.io.IOException;
34 import java.io.InputStream;
35 import java.io.OutputStream;
36 import java.io.UnsupportedEncodingException;
37 import java.io.ByteArrayInputStream;
38 import java.nio.ByteBuffer;
39 import java.util.ArrayList;
40 import java.util.Arrays;
41 import java.util.Iterator;
42 import java.util.List;
43 import java.util.NoSuchElementException;
44 import java.util.Stack;
45 
46 /**
47  * Class to represent {@code ByteStrings} formed by concatenation of other
48  * ByteStrings, without copying the data in the pieces. The concatenation is
49  * represented as a tree whose leaf nodes are each a {@link LiteralByteString}.
50  *
51  * <p>Most of the operation here is inspired by the now-famous paper <a
52  * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf">
53  * BAP95 </a> Ropes: an Alternative to Strings hans-j. boehm, russ atkinson and
54  * michael plass
55  *
56  * <p>The algorithms described in the paper have been implemented for character
57  * strings in {@link com.google.common.string.Rope} and in the c++ class {@code
58  * cord.cc}.
59  *
60  * <p>Fundamentally the Rope algorithm represents the collection of pieces as a
61  * binary tree. BAP95 uses a Fibonacci bound relating depth to a minimum
62  * sequence length, sequences that are too short relative to their depth cause a
63  * tree rebalance.  More precisely, a tree of depth d is "balanced" in the
64  * terminology of BAP95 if its length is at least F(d+2), where F(n) is the
65  * n-the Fibonacci number. Thus for depths 0, 1, 2, 3, 4, 5,... we have minimum
66  * lengths 1, 2, 3, 5, 8, 13,...
67  *
68  * @author carlanton@google.com (Carl Haverl)
69  */
70 class RopeByteString extends ByteString {
71 
72   /**
73    * BAP95. Let Fn be the nth Fibonacci number. A {@link RopeByteString} of
74    * depth n is "balanced", i.e flat enough, if its length is at least Fn+2,
75    * e.g. a "balanced" {@link RopeByteString} of depth 1 must have length at
76    * least 2, of depth 4 must have length >= 8, etc.
77    *
78    * <p>There's nothing special about using the Fibonacci numbers for this, but
79    * they are a reasonable sequence for encapsulating the idea that we are OK
80    * with longer strings being encoded in deeper binary trees.
81    *
82    * <p>For 32-bit integers, this array has length 46.
83    */
84   private static final int[] minLengthByDepth;
85 
86   static {
87     // Dynamically generate the list of Fibonacci numbers the first time this
88     // class is accessed.
89     List<Integer> numbers = new ArrayList<Integer>();
90 
91     // we skip the first Fibonacci number (1).  So instead of: 1 1 2 3 5 8 ...
92     // we have: 1 2 3 5 8 ...
93     int f1 = 1;
94     int f2 = 1;
95 
96     // get all the values until we roll over.
97     while (f2 > 0) {
98       numbers.add(f2);
99       int temp = f1 + f2;
100       f1 = f2;
101       f2 = temp;
102     }
103 
104     // we include this here so that we can index this array to [x + 1] in the
105     // loops below.
106     numbers.add(Integer.MAX_VALUE);
107     minLengthByDepth = new int[numbers.size()];
108     for (int i = 0; i < minLengthByDepth.length; i++) {
109       // unbox all the values
110       minLengthByDepth[i] = numbers.get(i);
111     }
112   }
113 
114   private final int totalLength;
115   private final ByteString left;
116   private final ByteString right;
117   private final int leftLength;
118   private final int treeDepth;
119 
120   /**
121    * Create a new RopeByteString, which can be thought of as a new tree node, by
122    * recording references to the two given strings.
123    *
124    * @param left  string on the left of this node, should have {@code size() >
125    *              0}
126    * @param right string on the right of this node, should have {@code size() >
127    *              0}
128    */
RopeByteString(ByteString left, ByteString right)129   private RopeByteString(ByteString left, ByteString right) {
130     this.left = left;
131     this.right = right;
132     leftLength = left.size();
133     totalLength = leftLength + right.size();
134     treeDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1;
135   }
136 
137   /**
138    * Concatenate the given strings while performing various optimizations to
139    * slow the growth rate of tree depth and tree node count. The result is
140    * either a {@link LiteralByteString} or a {@link RopeByteString}
141    * depending on which optimizations, if any, were applied.
142    *
143    * <p>Small pieces of length less than {@link
144    * ByteString#CONCATENATE_BY_COPY_SIZE} may be copied by value here, as in
145    * BAP95.  Large pieces are referenced without copy.
146    *
147    * @param left  string on the left
148    * @param right string on the right
149    * @return concatenation representing the same sequence as the given strings
150    */
concatenate(ByteString left, ByteString right)151   static ByteString concatenate(ByteString left, ByteString right) {
152     ByteString result;
153     RopeByteString leftRope =
154         (left instanceof RopeByteString) ? (RopeByteString) left : null;
155     if (right.size() == 0) {
156       result = left;
157     } else if (left.size() == 0) {
158       result = right;
159     } else {
160       int newLength = left.size() + right.size();
161       if (newLength < ByteString.CONCATENATE_BY_COPY_SIZE) {
162         // Optimization from BAP95: For short (leaves in paper, but just short
163         // here) total length, do a copy of data to a new leaf.
164         result = concatenateBytes(left, right);
165       } else if (leftRope != null
166           && leftRope.right.size() + right.size() < CONCATENATE_BY_COPY_SIZE) {
167         // Optimization from BAP95: As an optimization of the case where the
168         // ByteString is constructed by repeated concatenate, recognize the case
169         // where a short string is concatenated to a left-hand node whose
170         // right-hand branch is short.  In the paper this applies to leaves, but
171         // we just look at the length here. This has the advantage of shedding
172         // references to unneeded data when substrings have been taken.
173         //
174         // When we recognize this case, we do a copy of the data and create a
175         // new parent node so that the depth of the result is the same as the
176         // given left tree.
177         ByteString newRight = concatenateBytes(leftRope.right, right);
178         result = new RopeByteString(leftRope.left, newRight);
179       } else if (leftRope != null
180           && leftRope.left.getTreeDepth() > leftRope.right.getTreeDepth()
181           && leftRope.getTreeDepth() > right.getTreeDepth()) {
182         // Typically for concatenate-built strings the left-side is deeper than
183         // the right.  This is our final attempt to concatenate without
184         // increasing the tree depth.  We'll redo the the node on the RHS.  This
185         // is yet another optimization for building the string by repeatedly
186         // concatenating on the right.
187         ByteString newRight = new RopeByteString(leftRope.right, right);
188         result = new RopeByteString(leftRope.left, newRight);
189       } else {
190         // Fine, we'll add a node and increase the tree depth--unless we
191         // rebalance ;^)
192         int newDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1;
193         if (newLength >= minLengthByDepth[newDepth]) {
194           // The tree is shallow enough, so don't rebalance
195           result = new RopeByteString(left, right);
196         } else {
197           result = new Balancer().balance(left, right);
198         }
199       }
200     }
201     return result;
202   }
203 
204   /**
205    * Concatenates two strings by copying data values. This is called in a few
206    * cases in order to reduce the growth of the number of tree nodes.
207    *
208    * @param left  string on the left
209    * @param right string on the right
210    * @return string formed by copying data bytes
211    */
concatenateBytes(ByteString left, ByteString right)212   private static LiteralByteString concatenateBytes(ByteString left,
213       ByteString right) {
214     int leftSize = left.size();
215     int rightSize = right.size();
216     byte[] bytes = new byte[leftSize + rightSize];
217     left.copyTo(bytes, 0, 0, leftSize);
218     right.copyTo(bytes, 0, leftSize, rightSize);
219     return new LiteralByteString(bytes);  // Constructor wraps bytes
220   }
221 
222   /**
223    * Create a new RopeByteString for testing only while bypassing all the
224    * defenses of {@link #concatenate(ByteString, ByteString)}. This allows
225    * testing trees of specific structure. We are also able to insert empty
226    * leaves, though these are dis-allowed, so that we can make sure the
227    * implementation can withstand their presence.
228    *
229    * @param left  string on the left of this node
230    * @param right string on the right of this node
231    * @return an unsafe instance for testing only
232    */
newInstanceForTest(ByteString left, ByteString right)233   static RopeByteString newInstanceForTest(ByteString left, ByteString right) {
234     return new RopeByteString(left, right);
235   }
236 
237   /**
238    * Gets the byte at the given index.
239    * Throws {@link ArrayIndexOutOfBoundsException} for backwards-compatibility
240    * reasons although it would more properly be {@link
241    * IndexOutOfBoundsException}.
242    *
243    * @param index index of byte
244    * @return the value
245    * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size
246    */
247   @Override
byteAt(int index)248   public byte byteAt(int index) {
249     if (index < 0) {
250       throw new ArrayIndexOutOfBoundsException("Index < 0: " + index);
251     }
252     if (index > totalLength) {
253       throw new ArrayIndexOutOfBoundsException(
254           "Index > length: " + index + ", " + totalLength);
255     }
256 
257     byte result;
258     // Find the relevant piece by recursive descent
259     if (index < leftLength) {
260       result = left.byteAt(index);
261     } else {
262       result = right.byteAt(index - leftLength);
263     }
264     return result;
265   }
266 
267   @Override
size()268   public int size() {
269     return totalLength;
270   }
271 
272   // =================================================================
273   // Pieces
274 
275   @Override
getTreeDepth()276   protected int getTreeDepth() {
277     return treeDepth;
278   }
279 
280   /**
281    * Determines if the tree is balanced according to BAP95, which means the tree
282    * is flat-enough with respect to the bounds. Note that this definition of
283    * balanced is one where sub-trees of balanced trees are not necessarily
284    * balanced.
285    *
286    * @return true if the tree is balanced
287    */
288   @Override
isBalanced()289   protected boolean isBalanced() {
290     return totalLength >= minLengthByDepth[treeDepth];
291   }
292 
293   /**
294    * Takes a substring of this one. This involves recursive descent along the
295    * left and right edges of the substring, and referencing any wholly contained
296    * segments in between. Any leaf nodes entirely uninvolved in the substring
297    * will not be referenced by the substring.
298    *
299    * <p>Substrings of {@code length < 2} should result in at most a single
300    * recursive call chain, terminating at a leaf node. Thus the result will be a
301    * {@link LiteralByteString}. {@link #RopeByteString(ByteString,
302    * ByteString)}.
303    *
304    * @param beginIndex start at this index
305    * @param endIndex   the last character is the one before this index
306    * @return substring leaf node or tree
307    */
308   @Override
substring(int beginIndex, int endIndex)309   public ByteString substring(int beginIndex, int endIndex) {
310     if (beginIndex < 0) {
311       throw new IndexOutOfBoundsException(
312           "Beginning index: " + beginIndex + " < 0");
313     }
314     if (endIndex > totalLength) {
315       throw new IndexOutOfBoundsException(
316           "End index: " + endIndex + " > " + totalLength);
317     }
318     int substringLength = endIndex - beginIndex;
319     if (substringLength < 0) {
320       throw new IndexOutOfBoundsException(
321           "Beginning index larger than ending index: " + beginIndex + ", "
322               + endIndex);
323     }
324 
325     ByteString result;
326     if (substringLength == 0) {
327       // Empty substring
328       result = ByteString.EMPTY;
329     } else if (substringLength == totalLength) {
330       // The whole string
331       result = this;
332     } else {
333       // Proper substring
334       if (endIndex <= leftLength) {
335         // Substring on the left
336         result = left.substring(beginIndex, endIndex);
337       } else if (beginIndex >= leftLength) {
338         // Substring on the right
339         result = right
340             .substring(beginIndex - leftLength, endIndex - leftLength);
341       } else {
342         // Split substring
343         ByteString leftSub = left.substring(beginIndex);
344         ByteString rightSub = right.substring(0, endIndex - leftLength);
345         // Intentionally not rebalancing, since in many cases these two
346         // substrings will already be less deep than the top-level
347         // RopeByteString we're taking a substring of.
348         result = new RopeByteString(leftSub, rightSub);
349       }
350     }
351     return result;
352   }
353 
354   // =================================================================
355   // ByteString -> byte[]
356 
357   @Override
copyToInternal(byte[] target, int sourceOffset, int targetOffset, int numberToCopy)358   protected void copyToInternal(byte[] target, int sourceOffset,
359       int targetOffset, int numberToCopy) {
360    if (sourceOffset + numberToCopy <= leftLength) {
361       left.copyToInternal(target, sourceOffset, targetOffset, numberToCopy);
362     } else if (sourceOffset >= leftLength) {
363       right.copyToInternal(target, sourceOffset - leftLength, targetOffset,
364           numberToCopy);
365     } else {
366       int leftLength = this.leftLength - sourceOffset;
367       left.copyToInternal(target, sourceOffset, targetOffset, leftLength);
368       right.copyToInternal(target, 0, targetOffset + leftLength,
369           numberToCopy - leftLength);
370     }
371   }
372 
373   @Override
copyTo(ByteBuffer target)374   public void copyTo(ByteBuffer target) {
375     left.copyTo(target);
376     right.copyTo(target);
377   }
378 
379   @Override
asReadOnlyByteBuffer()380   public ByteBuffer asReadOnlyByteBuffer() {
381     ByteBuffer byteBuffer = ByteBuffer.wrap(toByteArray());
382     return byteBuffer.asReadOnlyBuffer();
383   }
384 
385   @Override
asReadOnlyByteBufferList()386   public List<ByteBuffer> asReadOnlyByteBufferList() {
387     // Walk through the list of LiteralByteString's that make up this
388     // rope, and add each one as a read-only ByteBuffer.
389     List<ByteBuffer> result = new ArrayList<ByteBuffer>();
390     PieceIterator pieces = new PieceIterator(this);
391     while (pieces.hasNext()) {
392       LiteralByteString byteString = pieces.next();
393       result.add(byteString.asReadOnlyByteBuffer());
394     }
395     return result;
396   }
397 
398   @Override
writeTo(OutputStream outputStream)399   public void writeTo(OutputStream outputStream) throws IOException {
400     left.writeTo(outputStream);
401     right.writeTo(outputStream);
402   }
403 
404   @Override
writeToInternal(OutputStream out, int sourceOffset, int numberToWrite)405   void writeToInternal(OutputStream out, int sourceOffset,
406       int numberToWrite) throws IOException {
407     if (sourceOffset + numberToWrite <= leftLength) {
408       left.writeToInternal(out, sourceOffset, numberToWrite);
409     } else if (sourceOffset >= leftLength) {
410       right.writeToInternal(out, sourceOffset - leftLength, numberToWrite);
411     } else {
412       int numberToWriteInLeft = leftLength - sourceOffset;
413       left.writeToInternal(out, sourceOffset, numberToWriteInLeft);
414       right.writeToInternal(out, 0, numberToWrite - numberToWriteInLeft);
415     }
416   }
417 
418   @Override
toString(String charsetName)419   public String toString(String charsetName)
420       throws UnsupportedEncodingException {
421     return new String(toByteArray(), charsetName);
422   }
423 
424   // =================================================================
425   // UTF-8 decoding
426 
427   @Override
isValidUtf8()428   public boolean isValidUtf8() {
429     int leftPartial = left.partialIsValidUtf8(Utf8.COMPLETE, 0, leftLength);
430     int state = right.partialIsValidUtf8(leftPartial, 0, right.size());
431     return state == Utf8.COMPLETE;
432   }
433 
434   @Override
partialIsValidUtf8(int state, int offset, int length)435   protected int partialIsValidUtf8(int state, int offset, int length) {
436     int toIndex = offset + length;
437     if (toIndex <= leftLength) {
438       return left.partialIsValidUtf8(state, offset, length);
439     } else if (offset >= leftLength) {
440       return right.partialIsValidUtf8(state, offset - leftLength, length);
441     } else {
442       int leftLength = this.leftLength - offset;
443       int leftPartial = left.partialIsValidUtf8(state, offset, leftLength);
444       return right.partialIsValidUtf8(leftPartial, 0, length - leftLength);
445     }
446   }
447 
448   // =================================================================
449   // equals() and hashCode()
450 
451   @Override
equals(Object other)452   public boolean equals(Object other) {
453     if (other == this) {
454       return true;
455     }
456     if (!(other instanceof ByteString)) {
457       return false;
458     }
459 
460     ByteString otherByteString = (ByteString) other;
461     if (totalLength != otherByteString.size()) {
462       return false;
463     }
464     if (totalLength == 0) {
465       return true;
466     }
467 
468     // You don't really want to be calling equals on long strings, but since
469     // we cache the hashCode, we effectively cache inequality. We use the cached
470     // hashCode if it's already computed.  It's arguable we should compute the
471     // hashCode here, and if we're going to be testing a bunch of byteStrings,
472     // it might even make sense.
473     if (hash != 0) {
474       int cachedOtherHash = otherByteString.peekCachedHashCode();
475       if (cachedOtherHash != 0 && hash != cachedOtherHash) {
476         return false;
477       }
478     }
479 
480     return equalsFragments(otherByteString);
481   }
482 
483   /**
484    * Determines if this string is equal to another of the same length by
485    * iterating over the leaf nodes. On each step of the iteration, the
486    * overlapping segments of the leaves are compared.
487    *
488    * @param other string of the same length as this one
489    * @return true if the values of this string equals the value of the given
490    *         one
491    */
equalsFragments(ByteString other)492   private boolean equalsFragments(ByteString other) {
493     int thisOffset = 0;
494     Iterator<LiteralByteString> thisIter = new PieceIterator(this);
495     LiteralByteString thisString = thisIter.next();
496 
497     int thatOffset = 0;
498     Iterator<LiteralByteString> thatIter = new PieceIterator(other);
499     LiteralByteString thatString = thatIter.next();
500 
501     int pos = 0;
502     while (true) {
503       int thisRemaining = thisString.size() - thisOffset;
504       int thatRemaining = thatString.size() - thatOffset;
505       int bytesToCompare = Math.min(thisRemaining, thatRemaining);
506 
507       // At least one of the offsets will be zero
508       boolean stillEqual = (thisOffset == 0)
509           ? thisString.equalsRange(thatString, thatOffset, bytesToCompare)
510           : thatString.equalsRange(thisString, thisOffset, bytesToCompare);
511       if (!stillEqual) {
512         return false;
513       }
514 
515       pos += bytesToCompare;
516       if (pos >= totalLength) {
517         if (pos == totalLength) {
518           return true;
519         }
520         throw new IllegalStateException();
521       }
522       // We always get to the end of at least one of the pieces
523       if (bytesToCompare == thisRemaining) { // If reached end of this
524         thisOffset = 0;
525         thisString = thisIter.next();
526       } else {
527         thisOffset += bytesToCompare;
528       }
529       if (bytesToCompare == thatRemaining) { // If reached end of that
530         thatOffset = 0;
531         thatString = thatIter.next();
532       } else {
533         thatOffset += bytesToCompare;
534       }
535     }
536   }
537 
538   /**
539    * Cached hash value.  Intentionally accessed via a data race, which is safe
540    * because of the Java Memory Model's "no out-of-thin-air values" guarantees
541    * for ints.
542    */
543   private int hash = 0;
544 
545   @Override
hashCode()546   public int hashCode() {
547     int h = hash;
548 
549     if (h == 0) {
550       h = totalLength;
551       h = partialHash(h, 0, totalLength);
552       if (h == 0) {
553         h = 1;
554       }
555       hash = h;
556     }
557     return h;
558   }
559 
560   @Override
peekCachedHashCode()561   protected int peekCachedHashCode() {
562     return hash;
563   }
564 
565   @Override
partialHash(int h, int offset, int length)566   protected int partialHash(int h, int offset, int length) {
567     int toIndex = offset + length;
568     if (toIndex <= leftLength) {
569       return left.partialHash(h, offset, length);
570     } else if (offset >= leftLength) {
571       return right.partialHash(h, offset - leftLength, length);
572     } else {
573       int leftLength = this.leftLength - offset;
574       int leftPartial = left.partialHash(h, offset, leftLength);
575       return right.partialHash(leftPartial, 0, length - leftLength);
576     }
577   }
578 
579   // =================================================================
580   // Input stream
581 
582   @Override
newCodedInput()583   public CodedInputStream newCodedInput() {
584     return CodedInputStream.newInstance(new RopeInputStream());
585   }
586 
587   @Override
newInput()588   public InputStream newInput() {
589     return new RopeInputStream();
590   }
591 
592   /**
593    * This class implements the balancing algorithm of BAP95. In the paper the
594    * authors use an array to keep track of pieces, while here we use a stack.
595    * The tree is balanced by traversing subtrees in left to right order, and the
596    * stack always contains the part of the string we've traversed so far.
597    *
598    * <p>One surprising aspect of the algorithm is the result of balancing is not
599    * necessarily balanced, though it is nearly balanced.  For details, see
600    * BAP95.
601    */
602   private static class Balancer {
603     // Stack containing the part of the string, starting from the left, that
604     // we've already traversed.  The final string should be the equivalent of
605     // concatenating the strings on the stack from bottom to top.
606     private final Stack<ByteString> prefixesStack = new Stack<ByteString>();
607 
balance(ByteString left, ByteString right)608     private ByteString balance(ByteString left, ByteString right) {
609       doBalance(left);
610       doBalance(right);
611 
612       // Sweep stack to gather the result
613       ByteString partialString = prefixesStack.pop();
614       while (!prefixesStack.isEmpty()) {
615         ByteString newLeft = prefixesStack.pop();
616         partialString = new RopeByteString(newLeft, partialString);
617       }
618       // We should end up with a RopeByteString since at a minimum we will
619       // create one from concatenating left and right
620       return partialString;
621     }
622 
doBalance(ByteString root)623     private void doBalance(ByteString root) {
624       // BAP95: Insert balanced subtrees whole. This means the result might not
625       // be balanced, leading to repeated rebalancings on concatenate. However,
626       // these rebalancings are shallow due to ignoring balanced subtrees, and
627       // relatively few calls to insert() result.
628       if (root.isBalanced()) {
629         insert(root);
630       } else if (root instanceof RopeByteString) {
631         RopeByteString rbs = (RopeByteString) root;
632         doBalance(rbs.left);
633         doBalance(rbs.right);
634       } else {
635         throw new IllegalArgumentException(
636             "Has a new type of ByteString been created? Found " +
637                 root.getClass());
638       }
639     }
640 
641     /**
642      * Push a string on the balance stack (BAP95).  BAP95 uses an array and
643      * calls the elements in the array 'bins'.  We instead use a stack, so the
644      * 'bins' of lengths are represented by differences between the elements of
645      * minLengthByDepth.
646      *
647      * <p>If the length bin for our string, and all shorter length bins, are
648      * empty, we just push it on the stack.  Otherwise, we need to start
649      * concatenating, putting the given string in the "middle" and continuing
650      * until we land in an empty length bin that matches the length of our
651      * concatenation.
652      *
653      * @param byteString string to place on the balance stack
654      */
insert(ByteString byteString)655     private void insert(ByteString byteString) {
656       int depthBin = getDepthBinForLength(byteString.size());
657       int binEnd = minLengthByDepth[depthBin + 1];
658 
659       // BAP95: Concatenate all trees occupying bins representing the length of
660       // our new piece or of shorter pieces, to the extent that is possible.
661       // The goal is to clear the bin which our piece belongs in, but that may
662       // not be entirely possible if there aren't enough longer bins occupied.
663       if (prefixesStack.isEmpty() || prefixesStack.peek().size() >= binEnd) {
664         prefixesStack.push(byteString);
665       } else {
666         int binStart = minLengthByDepth[depthBin];
667 
668         // Concatenate the subtrees of shorter length
669         ByteString newTree = prefixesStack.pop();
670         while (!prefixesStack.isEmpty()
671             && prefixesStack.peek().size() < binStart) {
672           ByteString left = prefixesStack.pop();
673           newTree = new RopeByteString(left, newTree);
674         }
675 
676         // Concatenate the given string
677         newTree = new RopeByteString(newTree, byteString);
678 
679         // Continue concatenating until we land in an empty bin
680         while (!prefixesStack.isEmpty()) {
681           depthBin = getDepthBinForLength(newTree.size());
682           binEnd = minLengthByDepth[depthBin + 1];
683           if (prefixesStack.peek().size() < binEnd) {
684             ByteString left = prefixesStack.pop();
685             newTree = new RopeByteString(left, newTree);
686           } else {
687             break;
688           }
689         }
690         prefixesStack.push(newTree);
691       }
692     }
693 
getDepthBinForLength(int length)694     private int getDepthBinForLength(int length) {
695       int depth = Arrays.binarySearch(minLengthByDepth, length);
696       if (depth < 0) {
697         // It wasn't an exact match, so convert to the index of the containing
698         // fragment, which is one less even than the insertion point.
699         int insertionPoint = -(depth + 1);
700         depth = insertionPoint - 1;
701       }
702 
703       return depth;
704     }
705   }
706 
707   /**
708    * This class is a continuable tree traversal, which keeps the state
709    * information which would exist on the stack in a recursive traversal instead
710    * on a stack of "Bread Crumbs". The maximum depth of the stack in this
711    * iterator is the same as the depth of the tree being traversed.
712    *
713    * <p>This iterator is used to implement
714    * {@link RopeByteString#equalsFragments(ByteString)}.
715    */
716   private static class PieceIterator implements Iterator<LiteralByteString> {
717 
718     private final Stack<RopeByteString> breadCrumbs =
719         new Stack<RopeByteString>();
720     private LiteralByteString next;
721 
PieceIterator(ByteString root)722     private PieceIterator(ByteString root) {
723       next = getLeafByLeft(root);
724     }
725 
getLeafByLeft(ByteString root)726     private LiteralByteString getLeafByLeft(ByteString root) {
727       ByteString pos = root;
728       while (pos instanceof RopeByteString) {
729         RopeByteString rbs = (RopeByteString) pos;
730         breadCrumbs.push(rbs);
731         pos = rbs.left;
732       }
733       return (LiteralByteString) pos;
734     }
735 
getNextNonEmptyLeaf()736     private LiteralByteString getNextNonEmptyLeaf() {
737       while (true) {
738         // Almost always, we go through this loop exactly once.  However, if
739         // we discover an empty string in the rope, we toss it and try again.
740         if (breadCrumbs.isEmpty()) {
741           return null;
742         } else {
743           LiteralByteString result = getLeafByLeft(breadCrumbs.pop().right);
744           if (!result.isEmpty()) {
745             return result;
746           }
747         }
748       }
749     }
750 
hasNext()751     public boolean hasNext() {
752       return next != null;
753     }
754 
755     /**
756      * Returns the next item and advances one {@code LiteralByteString}.
757      *
758      * @return next non-empty LiteralByteString or {@code null}
759      */
next()760     public LiteralByteString next() {
761       if (next == null) {
762         throw new NoSuchElementException();
763       }
764       LiteralByteString result = next;
765       next = getNextNonEmptyLeaf();
766       return result;
767     }
768 
remove()769     public void remove() {
770       throw new UnsupportedOperationException();
771     }
772   }
773 
774   // =================================================================
775   // ByteIterator
776 
777   @Override
iterator()778   public ByteIterator iterator() {
779     return new RopeByteIterator();
780   }
781 
782   private class RopeByteIterator implements ByteString.ByteIterator {
783 
784     private final PieceIterator pieces;
785     private ByteIterator bytes;
786     int bytesRemaining;
787 
RopeByteIterator()788     private RopeByteIterator() {
789       pieces = new PieceIterator(RopeByteString.this);
790       bytes = pieces.next().iterator();
791       bytesRemaining = size();
792     }
793 
hasNext()794     public boolean hasNext() {
795       return (bytesRemaining > 0);
796     }
797 
next()798     public Byte next() {
799       return nextByte(); // Does not instantiate a Byte
800     }
801 
nextByte()802     public byte nextByte() {
803       if (!bytes.hasNext()) {
804         bytes = pieces.next().iterator();
805       }
806       --bytesRemaining;
807       return bytes.nextByte();
808     }
809 
remove()810     public void remove() {
811       throw new UnsupportedOperationException();
812     }
813   }
814 
815   /**
816    * This class is the {@link RopeByteString} equivalent for
817    * {@link ByteArrayInputStream}.
818    */
819   private class RopeInputStream extends InputStream {
820     // Iterates through the pieces of the rope
821     private PieceIterator pieceIterator;
822     // The current piece
823     private LiteralByteString currentPiece;
824     // The size of the current piece
825     private int currentPieceSize;
826     // The index of the next byte to read in the current piece
827     private int currentPieceIndex;
828     // The offset of the start of the current piece in the rope byte string
829     private int currentPieceOffsetInRope;
830     // Offset in the buffer at which user called mark();
831     private int mark;
832 
RopeInputStream()833     public RopeInputStream() {
834       initialize();
835     }
836 
837     @Override
read(byte b[], int offset, int length)838     public int read(byte b[], int offset, int length)  {
839       if (b == null) {
840         throw new NullPointerException();
841       } else if (offset < 0 || length < 0 || length > b.length - offset) {
842         throw new IndexOutOfBoundsException();
843       }
844       return readSkipInternal(b, offset, length);
845     }
846 
847     @Override
skip(long length)848     public long skip(long length) {
849       if (length < 0) {
850         throw new IndexOutOfBoundsException();
851       } else if (length > Integer.MAX_VALUE) {
852         length = Integer.MAX_VALUE;
853       }
854       return readSkipInternal(null, 0, (int) length);
855     }
856 
857     /**
858      * Internal implementation of read and skip.  If b != null, then read the
859      * next {@code length} bytes into the buffer {@code b} at
860      * offset {@code offset}.  If b == null, then skip the next {@code length)
861      * bytes.
862      * <p>
863      * This method assumes that all error checking has already happened.
864      * <p>
865      * Returns the actual number of bytes read or skipped.
866      */
readSkipInternal(byte b[], int offset, int length)867     private int readSkipInternal(byte b[], int offset, int length)  {
868       int bytesRemaining = length;
869       while (bytesRemaining > 0) {
870         advanceIfCurrentPieceFullyRead();
871         if (currentPiece == null) {
872           if (bytesRemaining == length) {
873              // We didn't manage to read anything
874              return -1;
875            }
876           break;
877         } else {
878           // Copy the bytes from this piece.
879           int currentPieceRemaining = currentPieceSize - currentPieceIndex;
880           int count = Math.min(currentPieceRemaining, bytesRemaining);
881           if (b != null) {
882             currentPiece.copyTo(b, currentPieceIndex, offset, count);
883             offset += count;
884           }
885           currentPieceIndex += count;
886           bytesRemaining -= count;
887         }
888       }
889        // Return the number of bytes read.
890       return length - bytesRemaining;
891     }
892 
893     @Override
read()894     public int read() throws IOException {
895       advanceIfCurrentPieceFullyRead();
896       if (currentPiece == null) {
897         return -1;
898       } else {
899         return currentPiece.byteAt(currentPieceIndex++) & 0xFF;
900       }
901     }
902 
903     @Override
available()904     public int available() throws IOException {
905       int bytesRead = currentPieceOffsetInRope + currentPieceIndex;
906       return RopeByteString.this.size() - bytesRead;
907     }
908 
909     @Override
markSupported()910     public boolean markSupported() {
911       return true;
912     }
913 
914     @Override
mark(int readAheadLimit)915     public void mark(int readAheadLimit) {
916       // Set the mark to our position in the byte string
917       mark = currentPieceOffsetInRope + currentPieceIndex;
918     }
919 
920     @Override
reset()921     public synchronized void reset() {
922       // Just reinitialize and skip the specified number of bytes.
923       initialize();
924       readSkipInternal(null, 0, mark);
925     }
926 
927     /** Common initialization code used by both the constructor and reset() */
initialize()928     private void initialize() {
929       pieceIterator = new PieceIterator(RopeByteString.this);
930       currentPiece = pieceIterator.next();
931       currentPieceSize = currentPiece.size();
932       currentPieceIndex = 0;
933       currentPieceOffsetInRope = 0;
934     }
935 
936     /**
937      * Skips to the next piece if we have read all the data in the current
938      * piece.  Sets currentPiece to null if we have reached the end of the
939      * input.
940      */
advanceIfCurrentPieceFullyRead()941     private void advanceIfCurrentPieceFullyRead() {
942       if (currentPiece != null && currentPieceIndex == currentPieceSize) {
943         // Generally, we can only go through this loop at most once, since
944         // empty strings can't end up in a rope.  But better to test.
945         currentPieceOffsetInRope += currentPieceSize;
946         currentPieceIndex = 0;
947         if (pieceIterator.hasNext()) {
948           currentPiece = pieceIterator.next();
949           currentPieceSize = currentPiece.size();
950         } else {
951           currentPiece = null;
952           currentPieceSize = 0;
953         }
954       }
955     }
956   }
957 }
958