1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 package com.google.protobuf; 32 33 import java.io.IOException; 34 import java.io.InputStream; 35 import java.io.OutputStream; 36 import java.io.UnsupportedEncodingException; 37 import java.io.ByteArrayInputStream; 38 import java.nio.ByteBuffer; 39 import java.util.ArrayList; 40 import java.util.Arrays; 41 import java.util.Iterator; 42 import java.util.List; 43 import java.util.NoSuchElementException; 44 import java.util.Stack; 45 46 /** 47 * Class to represent {@code ByteStrings} formed by concatenation of other 48 * ByteStrings, without copying the data in the pieces. The concatenation is 49 * represented as a tree whose leaf nodes are each a {@link LiteralByteString}. 50 * 51 * <p>Most of the operation here is inspired by the now-famous paper <a 52 * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf"> 53 * BAP95 </a> Ropes: an Alternative to Strings hans-j. boehm, russ atkinson and 54 * michael plass 55 * 56 * <p>The algorithms described in the paper have been implemented for character 57 * strings in {@link com.google.common.string.Rope} and in the c++ class {@code 58 * cord.cc}. 59 * 60 * <p>Fundamentally the Rope algorithm represents the collection of pieces as a 61 * binary tree. BAP95 uses a Fibonacci bound relating depth to a minimum 62 * sequence length, sequences that are too short relative to their depth cause a 63 * tree rebalance. More precisely, a tree of depth d is "balanced" in the 64 * terminology of BAP95 if its length is at least F(d+2), where F(n) is the 65 * n-the Fibonacci number. Thus for depths 0, 1, 2, 3, 4, 5,... we have minimum 66 * lengths 1, 2, 3, 5, 8, 13,... 67 * 68 * @author carlanton@google.com (Carl Haverl) 69 */ 70 class RopeByteString extends ByteString { 71 72 /** 73 * BAP95. Let Fn be the nth Fibonacci number. A {@link RopeByteString} of 74 * depth n is "balanced", i.e flat enough, if its length is at least Fn+2, 75 * e.g. a "balanced" {@link RopeByteString} of depth 1 must have length at 76 * least 2, of depth 4 must have length >= 8, etc. 77 * 78 * <p>There's nothing special about using the Fibonacci numbers for this, but 79 * they are a reasonable sequence for encapsulating the idea that we are OK 80 * with longer strings being encoded in deeper binary trees. 81 * 82 * <p>For 32-bit integers, this array has length 46. 83 */ 84 private static final int[] minLengthByDepth; 85 86 static { 87 // Dynamically generate the list of Fibonacci numbers the first time this 88 // class is accessed. 89 List<Integer> numbers = new ArrayList<Integer>(); 90 91 // we skip the first Fibonacci number (1). So instead of: 1 1 2 3 5 8 ... 92 // we have: 1 2 3 5 8 ... 93 int f1 = 1; 94 int f2 = 1; 95 96 // get all the values until we roll over. 97 while (f2 > 0) { 98 numbers.add(f2); 99 int temp = f1 + f2; 100 f1 = f2; 101 f2 = temp; 102 } 103 104 // we include this here so that we can index this array to [x + 1] in the 105 // loops below. 106 numbers.add(Integer.MAX_VALUE); 107 minLengthByDepth = new int[numbers.size()]; 108 for (int i = 0; i < minLengthByDepth.length; i++) { 109 // unbox all the values 110 minLengthByDepth[i] = numbers.get(i); 111 } 112 } 113 114 private final int totalLength; 115 private final ByteString left; 116 private final ByteString right; 117 private final int leftLength; 118 private final int treeDepth; 119 120 /** 121 * Create a new RopeByteString, which can be thought of as a new tree node, by 122 * recording references to the two given strings. 123 * 124 * @param left string on the left of this node, should have {@code size() > 125 * 0} 126 * @param right string on the right of this node, should have {@code size() > 127 * 0} 128 */ RopeByteString(ByteString left, ByteString right)129 private RopeByteString(ByteString left, ByteString right) { 130 this.left = left; 131 this.right = right; 132 leftLength = left.size(); 133 totalLength = leftLength + right.size(); 134 treeDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1; 135 } 136 137 /** 138 * Concatenate the given strings while performing various optimizations to 139 * slow the growth rate of tree depth and tree node count. The result is 140 * either a {@link LiteralByteString} or a {@link RopeByteString} 141 * depending on which optimizations, if any, were applied. 142 * 143 * <p>Small pieces of length less than {@link 144 * ByteString#CONCATENATE_BY_COPY_SIZE} may be copied by value here, as in 145 * BAP95. Large pieces are referenced without copy. 146 * 147 * @param left string on the left 148 * @param right string on the right 149 * @return concatenation representing the same sequence as the given strings 150 */ concatenate(ByteString left, ByteString right)151 static ByteString concatenate(ByteString left, ByteString right) { 152 ByteString result; 153 RopeByteString leftRope = 154 (left instanceof RopeByteString) ? (RopeByteString) left : null; 155 if (right.size() == 0) { 156 result = left; 157 } else if (left.size() == 0) { 158 result = right; 159 } else { 160 int newLength = left.size() + right.size(); 161 if (newLength < ByteString.CONCATENATE_BY_COPY_SIZE) { 162 // Optimization from BAP95: For short (leaves in paper, but just short 163 // here) total length, do a copy of data to a new leaf. 164 result = concatenateBytes(left, right); 165 } else if (leftRope != null 166 && leftRope.right.size() + right.size() < CONCATENATE_BY_COPY_SIZE) { 167 // Optimization from BAP95: As an optimization of the case where the 168 // ByteString is constructed by repeated concatenate, recognize the case 169 // where a short string is concatenated to a left-hand node whose 170 // right-hand branch is short. In the paper this applies to leaves, but 171 // we just look at the length here. This has the advantage of shedding 172 // references to unneeded data when substrings have been taken. 173 // 174 // When we recognize this case, we do a copy of the data and create a 175 // new parent node so that the depth of the result is the same as the 176 // given left tree. 177 ByteString newRight = concatenateBytes(leftRope.right, right); 178 result = new RopeByteString(leftRope.left, newRight); 179 } else if (leftRope != null 180 && leftRope.left.getTreeDepth() > leftRope.right.getTreeDepth() 181 && leftRope.getTreeDepth() > right.getTreeDepth()) { 182 // Typically for concatenate-built strings the left-side is deeper than 183 // the right. This is our final attempt to concatenate without 184 // increasing the tree depth. We'll redo the the node on the RHS. This 185 // is yet another optimization for building the string by repeatedly 186 // concatenating on the right. 187 ByteString newRight = new RopeByteString(leftRope.right, right); 188 result = new RopeByteString(leftRope.left, newRight); 189 } else { 190 // Fine, we'll add a node and increase the tree depth--unless we 191 // rebalance ;^) 192 int newDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1; 193 if (newLength >= minLengthByDepth[newDepth]) { 194 // The tree is shallow enough, so don't rebalance 195 result = new RopeByteString(left, right); 196 } else { 197 result = new Balancer().balance(left, right); 198 } 199 } 200 } 201 return result; 202 } 203 204 /** 205 * Concatenates two strings by copying data values. This is called in a few 206 * cases in order to reduce the growth of the number of tree nodes. 207 * 208 * @param left string on the left 209 * @param right string on the right 210 * @return string formed by copying data bytes 211 */ concatenateBytes(ByteString left, ByteString right)212 private static LiteralByteString concatenateBytes(ByteString left, 213 ByteString right) { 214 int leftSize = left.size(); 215 int rightSize = right.size(); 216 byte[] bytes = new byte[leftSize + rightSize]; 217 left.copyTo(bytes, 0, 0, leftSize); 218 right.copyTo(bytes, 0, leftSize, rightSize); 219 return new LiteralByteString(bytes); // Constructor wraps bytes 220 } 221 222 /** 223 * Create a new RopeByteString for testing only while bypassing all the 224 * defenses of {@link #concatenate(ByteString, ByteString)}. This allows 225 * testing trees of specific structure. We are also able to insert empty 226 * leaves, though these are dis-allowed, so that we can make sure the 227 * implementation can withstand their presence. 228 * 229 * @param left string on the left of this node 230 * @param right string on the right of this node 231 * @return an unsafe instance for testing only 232 */ newInstanceForTest(ByteString left, ByteString right)233 static RopeByteString newInstanceForTest(ByteString left, ByteString right) { 234 return new RopeByteString(left, right); 235 } 236 237 /** 238 * Gets the byte at the given index. 239 * Throws {@link ArrayIndexOutOfBoundsException} for backwards-compatibility 240 * reasons although it would more properly be {@link 241 * IndexOutOfBoundsException}. 242 * 243 * @param index index of byte 244 * @return the value 245 * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size 246 */ 247 @Override byteAt(int index)248 public byte byteAt(int index) { 249 if (index < 0) { 250 throw new ArrayIndexOutOfBoundsException("Index < 0: " + index); 251 } 252 if (index > totalLength) { 253 throw new ArrayIndexOutOfBoundsException( 254 "Index > length: " + index + ", " + totalLength); 255 } 256 257 byte result; 258 // Find the relevant piece by recursive descent 259 if (index < leftLength) { 260 result = left.byteAt(index); 261 } else { 262 result = right.byteAt(index - leftLength); 263 } 264 return result; 265 } 266 267 @Override size()268 public int size() { 269 return totalLength; 270 } 271 272 // ================================================================= 273 // Pieces 274 275 @Override getTreeDepth()276 protected int getTreeDepth() { 277 return treeDepth; 278 } 279 280 /** 281 * Determines if the tree is balanced according to BAP95, which means the tree 282 * is flat-enough with respect to the bounds. Note that this definition of 283 * balanced is one where sub-trees of balanced trees are not necessarily 284 * balanced. 285 * 286 * @return true if the tree is balanced 287 */ 288 @Override isBalanced()289 protected boolean isBalanced() { 290 return totalLength >= minLengthByDepth[treeDepth]; 291 } 292 293 /** 294 * Takes a substring of this one. This involves recursive descent along the 295 * left and right edges of the substring, and referencing any wholly contained 296 * segments in between. Any leaf nodes entirely uninvolved in the substring 297 * will not be referenced by the substring. 298 * 299 * <p>Substrings of {@code length < 2} should result in at most a single 300 * recursive call chain, terminating at a leaf node. Thus the result will be a 301 * {@link LiteralByteString}. {@link #RopeByteString(ByteString, 302 * ByteString)}. 303 * 304 * @param beginIndex start at this index 305 * @param endIndex the last character is the one before this index 306 * @return substring leaf node or tree 307 */ 308 @Override substring(int beginIndex, int endIndex)309 public ByteString substring(int beginIndex, int endIndex) { 310 if (beginIndex < 0) { 311 throw new IndexOutOfBoundsException( 312 "Beginning index: " + beginIndex + " < 0"); 313 } 314 if (endIndex > totalLength) { 315 throw new IndexOutOfBoundsException( 316 "End index: " + endIndex + " > " + totalLength); 317 } 318 int substringLength = endIndex - beginIndex; 319 if (substringLength < 0) { 320 throw new IndexOutOfBoundsException( 321 "Beginning index larger than ending index: " + beginIndex + ", " 322 + endIndex); 323 } 324 325 ByteString result; 326 if (substringLength == 0) { 327 // Empty substring 328 result = ByteString.EMPTY; 329 } else if (substringLength == totalLength) { 330 // The whole string 331 result = this; 332 } else { 333 // Proper substring 334 if (endIndex <= leftLength) { 335 // Substring on the left 336 result = left.substring(beginIndex, endIndex); 337 } else if (beginIndex >= leftLength) { 338 // Substring on the right 339 result = right 340 .substring(beginIndex - leftLength, endIndex - leftLength); 341 } else { 342 // Split substring 343 ByteString leftSub = left.substring(beginIndex); 344 ByteString rightSub = right.substring(0, endIndex - leftLength); 345 // Intentionally not rebalancing, since in many cases these two 346 // substrings will already be less deep than the top-level 347 // RopeByteString we're taking a substring of. 348 result = new RopeByteString(leftSub, rightSub); 349 } 350 } 351 return result; 352 } 353 354 // ================================================================= 355 // ByteString -> byte[] 356 357 @Override copyToInternal(byte[] target, int sourceOffset, int targetOffset, int numberToCopy)358 protected void copyToInternal(byte[] target, int sourceOffset, 359 int targetOffset, int numberToCopy) { 360 if (sourceOffset + numberToCopy <= leftLength) { 361 left.copyToInternal(target, sourceOffset, targetOffset, numberToCopy); 362 } else if (sourceOffset >= leftLength) { 363 right.copyToInternal(target, sourceOffset - leftLength, targetOffset, 364 numberToCopy); 365 } else { 366 int leftLength = this.leftLength - sourceOffset; 367 left.copyToInternal(target, sourceOffset, targetOffset, leftLength); 368 right.copyToInternal(target, 0, targetOffset + leftLength, 369 numberToCopy - leftLength); 370 } 371 } 372 373 @Override copyTo(ByteBuffer target)374 public void copyTo(ByteBuffer target) { 375 left.copyTo(target); 376 right.copyTo(target); 377 } 378 379 @Override asReadOnlyByteBuffer()380 public ByteBuffer asReadOnlyByteBuffer() { 381 ByteBuffer byteBuffer = ByteBuffer.wrap(toByteArray()); 382 return byteBuffer.asReadOnlyBuffer(); 383 } 384 385 @Override asReadOnlyByteBufferList()386 public List<ByteBuffer> asReadOnlyByteBufferList() { 387 // Walk through the list of LiteralByteString's that make up this 388 // rope, and add each one as a read-only ByteBuffer. 389 List<ByteBuffer> result = new ArrayList<ByteBuffer>(); 390 PieceIterator pieces = new PieceIterator(this); 391 while (pieces.hasNext()) { 392 LiteralByteString byteString = pieces.next(); 393 result.add(byteString.asReadOnlyByteBuffer()); 394 } 395 return result; 396 } 397 398 @Override writeTo(OutputStream outputStream)399 public void writeTo(OutputStream outputStream) throws IOException { 400 left.writeTo(outputStream); 401 right.writeTo(outputStream); 402 } 403 404 @Override writeToInternal(OutputStream out, int sourceOffset, int numberToWrite)405 void writeToInternal(OutputStream out, int sourceOffset, 406 int numberToWrite) throws IOException { 407 if (sourceOffset + numberToWrite <= leftLength) { 408 left.writeToInternal(out, sourceOffset, numberToWrite); 409 } else if (sourceOffset >= leftLength) { 410 right.writeToInternal(out, sourceOffset - leftLength, numberToWrite); 411 } else { 412 int numberToWriteInLeft = leftLength - sourceOffset; 413 left.writeToInternal(out, sourceOffset, numberToWriteInLeft); 414 right.writeToInternal(out, 0, numberToWrite - numberToWriteInLeft); 415 } 416 } 417 418 @Override toString(String charsetName)419 public String toString(String charsetName) 420 throws UnsupportedEncodingException { 421 return new String(toByteArray(), charsetName); 422 } 423 424 // ================================================================= 425 // UTF-8 decoding 426 427 @Override isValidUtf8()428 public boolean isValidUtf8() { 429 int leftPartial = left.partialIsValidUtf8(Utf8.COMPLETE, 0, leftLength); 430 int state = right.partialIsValidUtf8(leftPartial, 0, right.size()); 431 return state == Utf8.COMPLETE; 432 } 433 434 @Override partialIsValidUtf8(int state, int offset, int length)435 protected int partialIsValidUtf8(int state, int offset, int length) { 436 int toIndex = offset + length; 437 if (toIndex <= leftLength) { 438 return left.partialIsValidUtf8(state, offset, length); 439 } else if (offset >= leftLength) { 440 return right.partialIsValidUtf8(state, offset - leftLength, length); 441 } else { 442 int leftLength = this.leftLength - offset; 443 int leftPartial = left.partialIsValidUtf8(state, offset, leftLength); 444 return right.partialIsValidUtf8(leftPartial, 0, length - leftLength); 445 } 446 } 447 448 // ================================================================= 449 // equals() and hashCode() 450 451 @Override equals(Object other)452 public boolean equals(Object other) { 453 if (other == this) { 454 return true; 455 } 456 if (!(other instanceof ByteString)) { 457 return false; 458 } 459 460 ByteString otherByteString = (ByteString) other; 461 if (totalLength != otherByteString.size()) { 462 return false; 463 } 464 if (totalLength == 0) { 465 return true; 466 } 467 468 // You don't really want to be calling equals on long strings, but since 469 // we cache the hashCode, we effectively cache inequality. We use the cached 470 // hashCode if it's already computed. It's arguable we should compute the 471 // hashCode here, and if we're going to be testing a bunch of byteStrings, 472 // it might even make sense. 473 if (hash != 0) { 474 int cachedOtherHash = otherByteString.peekCachedHashCode(); 475 if (cachedOtherHash != 0 && hash != cachedOtherHash) { 476 return false; 477 } 478 } 479 480 return equalsFragments(otherByteString); 481 } 482 483 /** 484 * Determines if this string is equal to another of the same length by 485 * iterating over the leaf nodes. On each step of the iteration, the 486 * overlapping segments of the leaves are compared. 487 * 488 * @param other string of the same length as this one 489 * @return true if the values of this string equals the value of the given 490 * one 491 */ equalsFragments(ByteString other)492 private boolean equalsFragments(ByteString other) { 493 int thisOffset = 0; 494 Iterator<LiteralByteString> thisIter = new PieceIterator(this); 495 LiteralByteString thisString = thisIter.next(); 496 497 int thatOffset = 0; 498 Iterator<LiteralByteString> thatIter = new PieceIterator(other); 499 LiteralByteString thatString = thatIter.next(); 500 501 int pos = 0; 502 while (true) { 503 int thisRemaining = thisString.size() - thisOffset; 504 int thatRemaining = thatString.size() - thatOffset; 505 int bytesToCompare = Math.min(thisRemaining, thatRemaining); 506 507 // At least one of the offsets will be zero 508 boolean stillEqual = (thisOffset == 0) 509 ? thisString.equalsRange(thatString, thatOffset, bytesToCompare) 510 : thatString.equalsRange(thisString, thisOffset, bytesToCompare); 511 if (!stillEqual) { 512 return false; 513 } 514 515 pos += bytesToCompare; 516 if (pos >= totalLength) { 517 if (pos == totalLength) { 518 return true; 519 } 520 throw new IllegalStateException(); 521 } 522 // We always get to the end of at least one of the pieces 523 if (bytesToCompare == thisRemaining) { // If reached end of this 524 thisOffset = 0; 525 thisString = thisIter.next(); 526 } else { 527 thisOffset += bytesToCompare; 528 } 529 if (bytesToCompare == thatRemaining) { // If reached end of that 530 thatOffset = 0; 531 thatString = thatIter.next(); 532 } else { 533 thatOffset += bytesToCompare; 534 } 535 } 536 } 537 538 /** 539 * Cached hash value. Intentionally accessed via a data race, which is safe 540 * because of the Java Memory Model's "no out-of-thin-air values" guarantees 541 * for ints. 542 */ 543 private int hash = 0; 544 545 @Override hashCode()546 public int hashCode() { 547 int h = hash; 548 549 if (h == 0) { 550 h = totalLength; 551 h = partialHash(h, 0, totalLength); 552 if (h == 0) { 553 h = 1; 554 } 555 hash = h; 556 } 557 return h; 558 } 559 560 @Override peekCachedHashCode()561 protected int peekCachedHashCode() { 562 return hash; 563 } 564 565 @Override partialHash(int h, int offset, int length)566 protected int partialHash(int h, int offset, int length) { 567 int toIndex = offset + length; 568 if (toIndex <= leftLength) { 569 return left.partialHash(h, offset, length); 570 } else if (offset >= leftLength) { 571 return right.partialHash(h, offset - leftLength, length); 572 } else { 573 int leftLength = this.leftLength - offset; 574 int leftPartial = left.partialHash(h, offset, leftLength); 575 return right.partialHash(leftPartial, 0, length - leftLength); 576 } 577 } 578 579 // ================================================================= 580 // Input stream 581 582 @Override newCodedInput()583 public CodedInputStream newCodedInput() { 584 return CodedInputStream.newInstance(new RopeInputStream()); 585 } 586 587 @Override newInput()588 public InputStream newInput() { 589 return new RopeInputStream(); 590 } 591 592 /** 593 * This class implements the balancing algorithm of BAP95. In the paper the 594 * authors use an array to keep track of pieces, while here we use a stack. 595 * The tree is balanced by traversing subtrees in left to right order, and the 596 * stack always contains the part of the string we've traversed so far. 597 * 598 * <p>One surprising aspect of the algorithm is the result of balancing is not 599 * necessarily balanced, though it is nearly balanced. For details, see 600 * BAP95. 601 */ 602 private static class Balancer { 603 // Stack containing the part of the string, starting from the left, that 604 // we've already traversed. The final string should be the equivalent of 605 // concatenating the strings on the stack from bottom to top. 606 private final Stack<ByteString> prefixesStack = new Stack<ByteString>(); 607 balance(ByteString left, ByteString right)608 private ByteString balance(ByteString left, ByteString right) { 609 doBalance(left); 610 doBalance(right); 611 612 // Sweep stack to gather the result 613 ByteString partialString = prefixesStack.pop(); 614 while (!prefixesStack.isEmpty()) { 615 ByteString newLeft = prefixesStack.pop(); 616 partialString = new RopeByteString(newLeft, partialString); 617 } 618 // We should end up with a RopeByteString since at a minimum we will 619 // create one from concatenating left and right 620 return partialString; 621 } 622 doBalance(ByteString root)623 private void doBalance(ByteString root) { 624 // BAP95: Insert balanced subtrees whole. This means the result might not 625 // be balanced, leading to repeated rebalancings on concatenate. However, 626 // these rebalancings are shallow due to ignoring balanced subtrees, and 627 // relatively few calls to insert() result. 628 if (root.isBalanced()) { 629 insert(root); 630 } else if (root instanceof RopeByteString) { 631 RopeByteString rbs = (RopeByteString) root; 632 doBalance(rbs.left); 633 doBalance(rbs.right); 634 } else { 635 throw new IllegalArgumentException( 636 "Has a new type of ByteString been created? Found " + 637 root.getClass()); 638 } 639 } 640 641 /** 642 * Push a string on the balance stack (BAP95). BAP95 uses an array and 643 * calls the elements in the array 'bins'. We instead use a stack, so the 644 * 'bins' of lengths are represented by differences between the elements of 645 * minLengthByDepth. 646 * 647 * <p>If the length bin for our string, and all shorter length bins, are 648 * empty, we just push it on the stack. Otherwise, we need to start 649 * concatenating, putting the given string in the "middle" and continuing 650 * until we land in an empty length bin that matches the length of our 651 * concatenation. 652 * 653 * @param byteString string to place on the balance stack 654 */ insert(ByteString byteString)655 private void insert(ByteString byteString) { 656 int depthBin = getDepthBinForLength(byteString.size()); 657 int binEnd = minLengthByDepth[depthBin + 1]; 658 659 // BAP95: Concatenate all trees occupying bins representing the length of 660 // our new piece or of shorter pieces, to the extent that is possible. 661 // The goal is to clear the bin which our piece belongs in, but that may 662 // not be entirely possible if there aren't enough longer bins occupied. 663 if (prefixesStack.isEmpty() || prefixesStack.peek().size() >= binEnd) { 664 prefixesStack.push(byteString); 665 } else { 666 int binStart = minLengthByDepth[depthBin]; 667 668 // Concatenate the subtrees of shorter length 669 ByteString newTree = prefixesStack.pop(); 670 while (!prefixesStack.isEmpty() 671 && prefixesStack.peek().size() < binStart) { 672 ByteString left = prefixesStack.pop(); 673 newTree = new RopeByteString(left, newTree); 674 } 675 676 // Concatenate the given string 677 newTree = new RopeByteString(newTree, byteString); 678 679 // Continue concatenating until we land in an empty bin 680 while (!prefixesStack.isEmpty()) { 681 depthBin = getDepthBinForLength(newTree.size()); 682 binEnd = minLengthByDepth[depthBin + 1]; 683 if (prefixesStack.peek().size() < binEnd) { 684 ByteString left = prefixesStack.pop(); 685 newTree = new RopeByteString(left, newTree); 686 } else { 687 break; 688 } 689 } 690 prefixesStack.push(newTree); 691 } 692 } 693 getDepthBinForLength(int length)694 private int getDepthBinForLength(int length) { 695 int depth = Arrays.binarySearch(minLengthByDepth, length); 696 if (depth < 0) { 697 // It wasn't an exact match, so convert to the index of the containing 698 // fragment, which is one less even than the insertion point. 699 int insertionPoint = -(depth + 1); 700 depth = insertionPoint - 1; 701 } 702 703 return depth; 704 } 705 } 706 707 /** 708 * This class is a continuable tree traversal, which keeps the state 709 * information which would exist on the stack in a recursive traversal instead 710 * on a stack of "Bread Crumbs". The maximum depth of the stack in this 711 * iterator is the same as the depth of the tree being traversed. 712 * 713 * <p>This iterator is used to implement 714 * {@link RopeByteString#equalsFragments(ByteString)}. 715 */ 716 private static class PieceIterator implements Iterator<LiteralByteString> { 717 718 private final Stack<RopeByteString> breadCrumbs = 719 new Stack<RopeByteString>(); 720 private LiteralByteString next; 721 PieceIterator(ByteString root)722 private PieceIterator(ByteString root) { 723 next = getLeafByLeft(root); 724 } 725 getLeafByLeft(ByteString root)726 private LiteralByteString getLeafByLeft(ByteString root) { 727 ByteString pos = root; 728 while (pos instanceof RopeByteString) { 729 RopeByteString rbs = (RopeByteString) pos; 730 breadCrumbs.push(rbs); 731 pos = rbs.left; 732 } 733 return (LiteralByteString) pos; 734 } 735 getNextNonEmptyLeaf()736 private LiteralByteString getNextNonEmptyLeaf() { 737 while (true) { 738 // Almost always, we go through this loop exactly once. However, if 739 // we discover an empty string in the rope, we toss it and try again. 740 if (breadCrumbs.isEmpty()) { 741 return null; 742 } else { 743 LiteralByteString result = getLeafByLeft(breadCrumbs.pop().right); 744 if (!result.isEmpty()) { 745 return result; 746 } 747 } 748 } 749 } 750 hasNext()751 public boolean hasNext() { 752 return next != null; 753 } 754 755 /** 756 * Returns the next item and advances one {@code LiteralByteString}. 757 * 758 * @return next non-empty LiteralByteString or {@code null} 759 */ next()760 public LiteralByteString next() { 761 if (next == null) { 762 throw new NoSuchElementException(); 763 } 764 LiteralByteString result = next; 765 next = getNextNonEmptyLeaf(); 766 return result; 767 } 768 remove()769 public void remove() { 770 throw new UnsupportedOperationException(); 771 } 772 } 773 774 // ================================================================= 775 // ByteIterator 776 777 @Override iterator()778 public ByteIterator iterator() { 779 return new RopeByteIterator(); 780 } 781 782 private class RopeByteIterator implements ByteString.ByteIterator { 783 784 private final PieceIterator pieces; 785 private ByteIterator bytes; 786 int bytesRemaining; 787 RopeByteIterator()788 private RopeByteIterator() { 789 pieces = new PieceIterator(RopeByteString.this); 790 bytes = pieces.next().iterator(); 791 bytesRemaining = size(); 792 } 793 hasNext()794 public boolean hasNext() { 795 return (bytesRemaining > 0); 796 } 797 next()798 public Byte next() { 799 return nextByte(); // Does not instantiate a Byte 800 } 801 nextByte()802 public byte nextByte() { 803 if (!bytes.hasNext()) { 804 bytes = pieces.next().iterator(); 805 } 806 --bytesRemaining; 807 return bytes.nextByte(); 808 } 809 remove()810 public void remove() { 811 throw new UnsupportedOperationException(); 812 } 813 } 814 815 /** 816 * This class is the {@link RopeByteString} equivalent for 817 * {@link ByteArrayInputStream}. 818 */ 819 private class RopeInputStream extends InputStream { 820 // Iterates through the pieces of the rope 821 private PieceIterator pieceIterator; 822 // The current piece 823 private LiteralByteString currentPiece; 824 // The size of the current piece 825 private int currentPieceSize; 826 // The index of the next byte to read in the current piece 827 private int currentPieceIndex; 828 // The offset of the start of the current piece in the rope byte string 829 private int currentPieceOffsetInRope; 830 // Offset in the buffer at which user called mark(); 831 private int mark; 832 RopeInputStream()833 public RopeInputStream() { 834 initialize(); 835 } 836 837 @Override read(byte b[], int offset, int length)838 public int read(byte b[], int offset, int length) { 839 if (b == null) { 840 throw new NullPointerException(); 841 } else if (offset < 0 || length < 0 || length > b.length - offset) { 842 throw new IndexOutOfBoundsException(); 843 } 844 return readSkipInternal(b, offset, length); 845 } 846 847 @Override skip(long length)848 public long skip(long length) { 849 if (length < 0) { 850 throw new IndexOutOfBoundsException(); 851 } else if (length > Integer.MAX_VALUE) { 852 length = Integer.MAX_VALUE; 853 } 854 return readSkipInternal(null, 0, (int) length); 855 } 856 857 /** 858 * Internal implementation of read and skip. If b != null, then read the 859 * next {@code length} bytes into the buffer {@code b} at 860 * offset {@code offset}. If b == null, then skip the next {@code length) 861 * bytes. 862 * <p> 863 * This method assumes that all error checking has already happened. 864 * <p> 865 * Returns the actual number of bytes read or skipped. 866 */ readSkipInternal(byte b[], int offset, int length)867 private int readSkipInternal(byte b[], int offset, int length) { 868 int bytesRemaining = length; 869 while (bytesRemaining > 0) { 870 advanceIfCurrentPieceFullyRead(); 871 if (currentPiece == null) { 872 if (bytesRemaining == length) { 873 // We didn't manage to read anything 874 return -1; 875 } 876 break; 877 } else { 878 // Copy the bytes from this piece. 879 int currentPieceRemaining = currentPieceSize - currentPieceIndex; 880 int count = Math.min(currentPieceRemaining, bytesRemaining); 881 if (b != null) { 882 currentPiece.copyTo(b, currentPieceIndex, offset, count); 883 offset += count; 884 } 885 currentPieceIndex += count; 886 bytesRemaining -= count; 887 } 888 } 889 // Return the number of bytes read. 890 return length - bytesRemaining; 891 } 892 893 @Override read()894 public int read() throws IOException { 895 advanceIfCurrentPieceFullyRead(); 896 if (currentPiece == null) { 897 return -1; 898 } else { 899 return currentPiece.byteAt(currentPieceIndex++) & 0xFF; 900 } 901 } 902 903 @Override available()904 public int available() throws IOException { 905 int bytesRead = currentPieceOffsetInRope + currentPieceIndex; 906 return RopeByteString.this.size() - bytesRead; 907 } 908 909 @Override markSupported()910 public boolean markSupported() { 911 return true; 912 } 913 914 @Override mark(int readAheadLimit)915 public void mark(int readAheadLimit) { 916 // Set the mark to our position in the byte string 917 mark = currentPieceOffsetInRope + currentPieceIndex; 918 } 919 920 @Override reset()921 public synchronized void reset() { 922 // Just reinitialize and skip the specified number of bytes. 923 initialize(); 924 readSkipInternal(null, 0, mark); 925 } 926 927 /** Common initialization code used by both the constructor and reset() */ initialize()928 private void initialize() { 929 pieceIterator = new PieceIterator(RopeByteString.this); 930 currentPiece = pieceIterator.next(); 931 currentPieceSize = currentPiece.size(); 932 currentPieceIndex = 0; 933 currentPieceOffsetInRope = 0; 934 } 935 936 /** 937 * Skips to the next piece if we have read all the data in the current 938 * piece. Sets currentPiece to null if we have reached the end of the 939 * input. 940 */ advanceIfCurrentPieceFullyRead()941 private void advanceIfCurrentPieceFullyRead() { 942 if (currentPiece != null && currentPieceIndex == currentPieceSize) { 943 // Generally, we can only go through this loop at most once, since 944 // empty strings can't end up in a rope. But better to test. 945 currentPieceOffsetInRope += currentPieceSize; 946 currentPieceIndex = 0; 947 if (pieceIterator.hasNext()) { 948 currentPiece = pieceIterator.next(); 949 currentPieceSize = currentPiece.size(); 950 } else { 951 currentPiece = null; 952 currentPieceSize = 0; 953 } 954 } 955 } 956 } 957 } 958