1 /* 2 * Copyright (c) 2009, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.util; 27 28 import java.util.concurrent.CountedCompleter; 29 import java.util.concurrent.RecursiveTask; 30 31 /** 32 * This class implements powerful and fully optimized versions, both 33 * sequential and parallel, of the Dual-Pivot Quicksort algorithm by 34 * Vladimir Yaroslavskiy, Jon Bentley and Josh Bloch. This algorithm 35 * offers O(n log(n)) performance on all data sets, and is typically 36 * faster than traditional (one-pivot) Quicksort implementations. 37 * 38 * There are also additional algorithms, invoked from the Dual-Pivot 39 * Quicksort, such as mixed insertion sort, merging of runs and heap 40 * sort, counting sort and parallel merge sort. 41 * 42 * @author Vladimir Yaroslavskiy 43 * @author Jon Bentley 44 * @author Josh Bloch 45 * @author Doug Lea 46 * 47 * @version 2018.08.18 48 * 49 * @since 1.7 * 14 50 */ 51 final class DualPivotQuicksort { 52 53 /** 54 * Prevents instantiation. 55 */ DualPivotQuicksort()56 private DualPivotQuicksort() {} 57 58 /** 59 * Max array size to use mixed insertion sort. 60 */ 61 private static final int MAX_MIXED_INSERTION_SORT_SIZE = 65; 62 63 /** 64 * Max array size to use insertion sort. 65 */ 66 private static final int MAX_INSERTION_SORT_SIZE = 44; 67 68 /** 69 * Min array size to perform sorting in parallel. 70 */ 71 private static final int MIN_PARALLEL_SORT_SIZE = 4 << 10; 72 73 /** 74 * Min array size to try merging of runs. 75 */ 76 private static final int MIN_TRY_MERGE_SIZE = 4 << 10; 77 78 /** 79 * Min size of the first run to continue with scanning. 80 */ 81 private static final int MIN_FIRST_RUN_SIZE = 16; 82 83 /** 84 * Min factor for the first runs to continue scanning. 85 */ 86 private static final int MIN_FIRST_RUNS_FACTOR = 7; 87 88 /** 89 * Max capacity of the index array for tracking runs. 90 */ 91 private static final int MAX_RUN_CAPACITY = 5 << 10; 92 93 /** 94 * Min number of runs, required by parallel merging. 95 */ 96 private static final int MIN_RUN_COUNT = 4; 97 98 /** 99 * Min array size to use parallel merging of parts. 100 */ 101 private static final int MIN_PARALLEL_MERGE_PARTS_SIZE = 4 << 10; 102 103 /** 104 * Min size of a byte array to use counting sort. 105 */ 106 private static final int MIN_BYTE_COUNTING_SORT_SIZE = 64; 107 108 /** 109 * Min size of a short or char array to use counting sort. 110 */ 111 private static final int MIN_SHORT_OR_CHAR_COUNTING_SORT_SIZE = 1750; 112 113 /** 114 * Threshold of mixed insertion sort is incremented by this value. 115 */ 116 private static final int DELTA = 3 << 1; 117 118 /** 119 * Max recursive partitioning depth before using heap sort. 120 */ 121 private static final int MAX_RECURSION_DEPTH = 64 * DELTA; 122 123 /** 124 * Calculates the double depth of parallel merging. 125 * Depth is negative, if tasks split before sorting. 126 * 127 * @param parallelism the parallelism level 128 * @param size the target size 129 * @return the depth of parallel merging 130 */ getDepth(int parallelism, int size)131 private static int getDepth(int parallelism, int size) { 132 int depth = 0; 133 134 while ((parallelism >>= 3) > 0 && (size >>= 2) > 0) { 135 depth -= 2; 136 } 137 return depth; 138 } 139 140 /** 141 * Sorts the specified range of the array using parallel merge 142 * sort and/or Dual-Pivot Quicksort. 143 * 144 * To balance the faster splitting and parallelism of merge sort 145 * with the faster element partitioning of Quicksort, ranges are 146 * subdivided in tiers such that, if there is enough parallelism, 147 * the four-way parallel merge is started, still ensuring enough 148 * parallelism to process the partitions. 149 * 150 * @param a the array to be sorted 151 * @param parallelism the parallelism level 152 * @param low the index of the first element, inclusive, to be sorted 153 * @param high the index of the last element, exclusive, to be sorted 154 */ sort(int[] a, int parallelism, int low, int high)155 static void sort(int[] a, int parallelism, int low, int high) { 156 int size = high - low; 157 158 if (parallelism > 1 && size > MIN_PARALLEL_SORT_SIZE) { 159 int depth = getDepth(parallelism, size >> 12); 160 int[] b = depth == 0 ? null : new int[size]; 161 new Sorter(null, a, b, low, size, low, depth).invoke(); 162 } else { 163 sort(null, a, 0, low, high); 164 } 165 } 166 167 /** 168 * Sorts the specified array using the Dual-Pivot Quicksort and/or 169 * other sorts in special-cases, possibly with parallel partitions. 170 * 171 * @param sorter parallel context 172 * @param a the array to be sorted 173 * @param bits the combination of recursion depth and bit flag, where 174 * the right bit "0" indicates that array is the leftmost part 175 * @param low the index of the first element, inclusive, to be sorted 176 * @param high the index of the last element, exclusive, to be sorted 177 */ sort(Sorter sorter, int[] a, int bits, int low, int high)178 static void sort(Sorter sorter, int[] a, int bits, int low, int high) { 179 while (true) { 180 int end = high - 1, size = high - low; 181 182 /* 183 * Run mixed insertion sort on small non-leftmost parts. 184 */ 185 if (size < MAX_MIXED_INSERTION_SORT_SIZE + bits && (bits & 1) > 0) { 186 mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high); 187 return; 188 } 189 190 /* 191 * Invoke insertion sort on small leftmost part. 192 */ 193 if (size < MAX_INSERTION_SORT_SIZE) { 194 insertionSort(a, low, high); 195 return; 196 } 197 198 /* 199 * Check if the whole array or large non-leftmost 200 * parts are nearly sorted and then merge runs. 201 */ 202 if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0) 203 && tryMergeRuns(sorter, a, low, size)) { 204 return; 205 } 206 207 /* 208 * Switch to heap sort if execution 209 * time is becoming quadratic. 210 */ 211 if ((bits += DELTA) > MAX_RECURSION_DEPTH) { 212 heapSort(a, low, high); 213 return; 214 } 215 216 /* 217 * Use an inexpensive approximation of the golden ratio 218 * to select five sample elements and determine pivots. 219 */ 220 int step = (size >> 3) * 3 + 3; 221 222 /* 223 * Five elements around (and including) the central element 224 * will be used for pivot selection as described below. The 225 * unequal choice of spacing these elements was empirically 226 * determined to work well on a wide variety of inputs. 227 */ 228 int e1 = low + step; 229 int e5 = end - step; 230 int e3 = (e1 + e5) >>> 1; 231 int e2 = (e1 + e3) >>> 1; 232 int e4 = (e3 + e5) >>> 1; 233 int a3 = a[e3]; 234 235 /* 236 * Sort these elements in place by the combination 237 * of 4-element sorting network and insertion sort. 238 * 239 * 5 ------o-----------o------------ 240 * | | 241 * 4 ------|-----o-----o-----o------ 242 * | | | 243 * 2 ------o-----|-----o-----o------ 244 * | | 245 * 1 ------------o-----o------------ 246 */ 247 if (a[e5] < a[e2]) { int t = a[e5]; a[e5] = a[e2]; a[e2] = t; } 248 if (a[e4] < a[e1]) { int t = a[e4]; a[e4] = a[e1]; a[e1] = t; } 249 if (a[e5] < a[e4]) { int t = a[e5]; a[e5] = a[e4]; a[e4] = t; } 250 if (a[e2] < a[e1]) { int t = a[e2]; a[e2] = a[e1]; a[e1] = t; } 251 if (a[e4] < a[e2]) { int t = a[e4]; a[e4] = a[e2]; a[e2] = t; } 252 253 if (a3 < a[e2]) { 254 if (a3 < a[e1]) { 255 a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; 256 } else { 257 a[e3] = a[e2]; a[e2] = a3; 258 } 259 } else if (a3 > a[e4]) { 260 if (a3 > a[e5]) { 261 a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; 262 } else { 263 a[e3] = a[e4]; a[e4] = a3; 264 } 265 } 266 267 // Pointers 268 int lower = low; // The index of the last element of the left part 269 int upper = end; // The index of the first element of the right part 270 271 /* 272 * Partitioning with 2 pivots in case of different elements. 273 */ 274 if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { 275 276 /* 277 * Use the first and fifth of the five sorted elements as 278 * the pivots. These values are inexpensive approximation 279 * of tertiles. Note, that pivot1 < pivot2. 280 */ 281 int pivot1 = a[e1]; 282 int pivot2 = a[e5]; 283 284 /* 285 * The first and the last elements to be sorted are moved 286 * to the locations formerly occupied by the pivots. When 287 * partitioning is completed, the pivots are swapped back 288 * into their final positions, and excluded from the next 289 * subsequent sorting. 290 */ 291 a[e1] = a[lower]; 292 a[e5] = a[upper]; 293 294 /* 295 * Skip elements, which are less or greater than the pivots. 296 */ 297 while (a[++lower] < pivot1); 298 while (a[--upper] > pivot2); 299 300 /* 301 * Backward 3-interval partitioning 302 * 303 * left part central part right part 304 * +------------------------------------------------------------+ 305 * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | 306 * +------------------------------------------------------------+ 307 * ^ ^ ^ 308 * | | | 309 * lower k upper 310 * 311 * Invariants: 312 * 313 * all in (low, lower] < pivot1 314 * pivot1 <= all in (k, upper) <= pivot2 315 * all in [upper, end) > pivot2 316 * 317 * Pointer k is the last index of ?-part 318 */ 319 for (int unused = --lower, k = ++upper; --k > lower; ) { 320 int ak = a[k]; 321 322 if (ak < pivot1) { // Move a[k] to the left side 323 while (lower < k) { 324 if (a[++lower] >= pivot1) { 325 if (a[lower] > pivot2) { 326 a[k] = a[--upper]; 327 a[upper] = a[lower]; 328 } else { 329 a[k] = a[lower]; 330 } 331 a[lower] = ak; 332 break; 333 } 334 } 335 } else if (ak > pivot2) { // Move a[k] to the right side 336 a[k] = a[--upper]; 337 a[upper] = ak; 338 } 339 } 340 341 /* 342 * Swap the pivots into their final positions. 343 */ 344 a[low] = a[lower]; a[lower] = pivot1; 345 a[end] = a[upper]; a[upper] = pivot2; 346 347 /* 348 * Sort non-left parts recursively (possibly in parallel), 349 * excluding known pivots. 350 */ 351 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { 352 sorter.forkSorter(bits | 1, lower + 1, upper); 353 sorter.forkSorter(bits | 1, upper + 1, high); 354 } else { 355 sort(sorter, a, bits | 1, lower + 1, upper); 356 sort(sorter, a, bits | 1, upper + 1, high); 357 } 358 359 } else { // Use single pivot in case of many equal elements 360 361 /* 362 * Use the third of the five sorted elements as the pivot. 363 * This value is inexpensive approximation of the median. 364 */ 365 int pivot = a[e3]; 366 367 /* 368 * The first element to be sorted is moved to the 369 * location formerly occupied by the pivot. After 370 * completion of partitioning the pivot is swapped 371 * back into its final position, and excluded from 372 * the next subsequent sorting. 373 */ 374 a[e3] = a[lower]; 375 376 /* 377 * Traditional 3-way (Dutch National Flag) partitioning 378 * 379 * left part central part right part 380 * +------------------------------------------------------+ 381 * | < pivot | ? | == pivot | > pivot | 382 * +------------------------------------------------------+ 383 * ^ ^ ^ 384 * | | | 385 * lower k upper 386 * 387 * Invariants: 388 * 389 * all in (low, lower] < pivot 390 * all in (k, upper) == pivot 391 * all in [upper, end] > pivot 392 * 393 * Pointer k is the last index of ?-part 394 */ 395 for (int k = ++upper; --k > lower; ) { 396 int ak = a[k]; 397 398 if (ak != pivot) { 399 a[k] = pivot; 400 401 if (ak < pivot) { // Move a[k] to the left side 402 while (a[++lower] < pivot); 403 404 if (a[lower] > pivot) { 405 a[--upper] = a[lower]; 406 } 407 a[lower] = ak; 408 } else { // ak > pivot - Move a[k] to the right side 409 a[--upper] = ak; 410 } 411 } 412 } 413 414 /* 415 * Swap the pivot into its final position. 416 */ 417 a[low] = a[lower]; a[lower] = pivot; 418 419 /* 420 * Sort the right part (possibly in parallel), excluding 421 * known pivot. All elements from the central part are 422 * equal and therefore already sorted. 423 */ 424 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { 425 sorter.forkSorter(bits | 1, upper, high); 426 } else { 427 sort(sorter, a, bits | 1, upper, high); 428 } 429 } 430 high = lower; // Iterate along the left part 431 } 432 } 433 434 /** 435 * Sorts the specified range of the array using mixed insertion sort. 436 * 437 * Mixed insertion sort is combination of simple insertion sort, 438 * pin insertion sort and pair insertion sort. 439 * 440 * In the context of Dual-Pivot Quicksort, the pivot element 441 * from the left part plays the role of sentinel, because it 442 * is less than any elements from the given part. Therefore, 443 * expensive check of the left range can be skipped on each 444 * iteration unless it is the leftmost call. 445 * 446 * @param a the array to be sorted 447 * @param low the index of the first element, inclusive, to be sorted 448 * @param end the index of the last element for simple insertion sort 449 * @param high the index of the last element, exclusive, to be sorted 450 */ mixedInsertionSort(int[] a, int low, int end, int high)451 private static void mixedInsertionSort(int[] a, int low, int end, int high) { 452 if (end == high) { 453 454 /* 455 * Invoke simple insertion sort on tiny array. 456 */ 457 for (int i; ++low < end; ) { 458 int ai = a[i = low]; 459 460 while (ai < a[--i]) { 461 a[i + 1] = a[i]; 462 } 463 a[i + 1] = ai; 464 } 465 } else { 466 467 /* 468 * Start with pin insertion sort on small part. 469 * 470 * Pin insertion sort is extended simple insertion sort. 471 * The main idea of this sort is to put elements larger 472 * than an element called pin to the end of array (the 473 * proper area for such elements). It avoids expensive 474 * movements of these elements through the whole array. 475 */ 476 int pin = a[end]; 477 478 for (int i, p = high; ++low < end; ) { 479 int ai = a[i = low]; 480 481 if (ai < a[i - 1]) { // Small element 482 483 /* 484 * Insert small element into sorted part. 485 */ 486 a[i] = a[--i]; 487 488 while (ai < a[--i]) { 489 a[i + 1] = a[i]; 490 } 491 a[i + 1] = ai; 492 493 } else if (p > i && ai > pin) { // Large element 494 495 /* 496 * Find element smaller than pin. 497 */ 498 while (a[--p] > pin); 499 500 /* 501 * Swap it with large element. 502 */ 503 if (p > i) { 504 ai = a[p]; 505 a[p] = a[i]; 506 } 507 508 /* 509 * Insert small element into sorted part. 510 */ 511 while (ai < a[--i]) { 512 a[i + 1] = a[i]; 513 } 514 a[i + 1] = ai; 515 } 516 } 517 518 /* 519 * Continue with pair insertion sort on remain part. 520 */ 521 for (int i; low < high; ++low) { 522 int a1 = a[i = low], a2 = a[++low]; 523 524 /* 525 * Insert two elements per iteration: at first, insert the 526 * larger element and then insert the smaller element, but 527 * from the position where the larger element was inserted. 528 */ 529 if (a1 > a2) { 530 531 while (a1 < a[--i]) { 532 a[i + 2] = a[i]; 533 } 534 a[++i + 1] = a1; 535 536 while (a2 < a[--i]) { 537 a[i + 1] = a[i]; 538 } 539 a[i + 1] = a2; 540 541 } else if (a1 < a[i - 1]) { 542 543 while (a2 < a[--i]) { 544 a[i + 2] = a[i]; 545 } 546 a[++i + 1] = a2; 547 548 while (a1 < a[--i]) { 549 a[i + 1] = a[i]; 550 } 551 a[i + 1] = a1; 552 } 553 } 554 } 555 } 556 557 /** 558 * Sorts the specified range of the array using insertion sort. 559 * 560 * @param a the array to be sorted 561 * @param low the index of the first element, inclusive, to be sorted 562 * @param high the index of the last element, exclusive, to be sorted 563 */ insertionSort(int[] a, int low, int high)564 private static void insertionSort(int[] a, int low, int high) { 565 for (int i, k = low; ++k < high; ) { 566 int ai = a[i = k]; 567 568 if (ai < a[i - 1]) { 569 while (--i >= low && ai < a[i]) { 570 a[i + 1] = a[i]; 571 } 572 a[i + 1] = ai; 573 } 574 } 575 } 576 577 /** 578 * Sorts the specified range of the array using heap sort. 579 * 580 * @param a the array to be sorted 581 * @param low the index of the first element, inclusive, to be sorted 582 * @param high the index of the last element, exclusive, to be sorted 583 */ heapSort(int[] a, int low, int high)584 private static void heapSort(int[] a, int low, int high) { 585 for (int k = (low + high) >>> 1; k > low; ) { 586 pushDown(a, --k, a[k], low, high); 587 } 588 while (--high > low) { 589 int max = a[low]; 590 pushDown(a, low, a[high], low, high); 591 a[high] = max; 592 } 593 } 594 595 /** 596 * Pushes specified element down during heap sort. 597 * 598 * @param a the given array 599 * @param p the start index 600 * @param value the given element 601 * @param low the index of the first element, inclusive, to be sorted 602 * @param high the index of the last element, exclusive, to be sorted 603 */ pushDown(int[] a, int p, int value, int low, int high)604 private static void pushDown(int[] a, int p, int value, int low, int high) { 605 for (int k ;; a[p] = a[p = k]) { 606 k = (p << 1) - low + 2; // Index of the right child 607 608 if (k > high) { 609 break; 610 } 611 if (k == high || a[k] < a[k - 1]) { 612 --k; 613 } 614 if (a[k] <= value) { 615 break; 616 } 617 } 618 a[p] = value; 619 } 620 621 /** 622 * Tries to sort the specified range of the array. 623 * 624 * @param sorter parallel context 625 * @param a the array to be sorted 626 * @param low the index of the first element to be sorted 627 * @param size the array size 628 * @return true if finally sorted, false otherwise 629 */ tryMergeRuns(Sorter sorter, int[] a, int low, int size)630 private static boolean tryMergeRuns(Sorter sorter, int[] a, int low, int size) { 631 632 /* 633 * The run array is constructed only if initial runs are 634 * long enough to continue, run[i] then holds start index 635 * of the i-th sequence of elements in non-descending order. 636 */ 637 int[] run = null; 638 int high = low + size; 639 int count = 1, last = low; 640 641 /* 642 * Identify all possible runs. 643 */ 644 for (int k = low + 1; k < high; ) { 645 646 /* 647 * Find the end index of the current run. 648 */ 649 if (a[k - 1] < a[k]) { 650 651 // Identify ascending sequence 652 while (++k < high && a[k - 1] <= a[k]); 653 654 } else if (a[k - 1] > a[k]) { 655 656 // Identify descending sequence 657 while (++k < high && a[k - 1] >= a[k]); 658 659 // Reverse into ascending order 660 for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) { 661 int ai = a[i]; a[i] = a[j]; a[j] = ai; 662 } 663 } else { // Identify constant sequence 664 for (int ak = a[k]; ++k < high && ak == a[k]; ); 665 666 if (k < high) { 667 continue; 668 } 669 } 670 671 /* 672 * Check special cases. 673 */ 674 if (run == null) { 675 if (k == high) { 676 677 /* 678 * The array is monotonous sequence, 679 * and therefore already sorted. 680 */ 681 return true; 682 } 683 684 if (k - low < MIN_FIRST_RUN_SIZE) { 685 686 /* 687 * The first run is too small 688 * to proceed with scanning. 689 */ 690 return false; 691 } 692 693 run = new int[((size >> 10) | 0x7F) & 0x3FF]; 694 run[0] = low; 695 696 } else if (a[last - 1] > a[last]) { 697 698 if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) { 699 700 /* 701 * The first runs are not long 702 * enough to continue scanning. 703 */ 704 return false; 705 } 706 707 if (++count == MAX_RUN_CAPACITY) { 708 709 /* 710 * Array is not highly structured. 711 */ 712 return false; 713 } 714 715 if (count == run.length) { 716 717 /* 718 * Increase capacity of index array. 719 */ 720 run = Arrays.copyOf(run, count << 1); 721 } 722 } 723 run[count] = (last = k); 724 } 725 726 /* 727 * Merge runs of highly structured array. 728 */ 729 if (count > 1) { 730 int[] b; int offset = low; 731 732 if (sorter == null || (b = (int[]) sorter.b) == null) { 733 b = new int[size]; 734 } else { 735 offset = sorter.offset; 736 } 737 mergeRuns(a, b, offset, 1, sorter != null, run, 0, count); 738 } 739 return true; 740 } 741 742 /** 743 * Merges the specified runs. 744 * 745 * @param a the source array 746 * @param b the temporary buffer used in merging 747 * @param offset the start index in the source, inclusive 748 * @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0) 749 * @param parallel indicates whether merging is performed in parallel 750 * @param run the start indexes of the runs, inclusive 751 * @param lo the start index of the first run, inclusive 752 * @param hi the start index of the last run, inclusive 753 * @return the destination where runs are merged 754 */ mergeRuns(int[] a, int[] b, int offset, int aim, boolean parallel, int[] run, int lo, int hi)755 private static int[] mergeRuns(int[] a, int[] b, int offset, 756 int aim, boolean parallel, int[] run, int lo, int hi) { 757 758 if (hi - lo == 1) { 759 if (aim >= 0) { 760 return a; 761 } 762 for (int i = run[hi], j = i - offset, low = run[lo]; i > low; 763 b[--j] = a[--i] 764 ); 765 return b; 766 } 767 768 /* 769 * Split into approximately equal parts. 770 */ 771 int mi = lo, rmi = (run[lo] + run[hi]) >>> 1; 772 while (run[++mi + 1] <= rmi); 773 774 /* 775 * Merge the left and right parts. 776 */ 777 int[] a1, a2; 778 779 if (parallel && hi - lo > MIN_RUN_COUNT) { 780 RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe(); 781 a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi); 782 a2 = (int[]) merger.getDestination(); 783 } else { 784 a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi); 785 a2 = mergeRuns(a, b, offset, 0, false, run, mi, hi); 786 } 787 788 int[] dst = a1 == a ? b : a; 789 790 int k = a1 == a ? run[lo] - offset : run[lo]; 791 int lo1 = a1 == b ? run[lo] - offset : run[lo]; 792 int hi1 = a1 == b ? run[mi] - offset : run[mi]; 793 int lo2 = a2 == b ? run[mi] - offset : run[mi]; 794 int hi2 = a2 == b ? run[hi] - offset : run[hi]; 795 796 if (parallel) { 797 new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke(); 798 } else { 799 mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2); 800 } 801 return dst; 802 } 803 804 /** 805 * Merges the sorted parts. 806 * 807 * @param merger parallel context 808 * @param dst the destination where parts are merged 809 * @param k the start index of the destination, inclusive 810 * @param a1 the first part 811 * @param lo1 the start index of the first part, inclusive 812 * @param hi1 the end index of the first part, exclusive 813 * @param a2 the second part 814 * @param lo2 the start index of the second part, inclusive 815 * @param hi2 the end index of the second part, exclusive 816 */ mergeParts(Merger merger, int[] dst, int k, int[] a1, int lo1, int hi1, int[] a2, int lo2, int hi2)817 private static void mergeParts(Merger merger, int[] dst, int k, 818 int[] a1, int lo1, int hi1, int[] a2, int lo2, int hi2) { 819 820 if (merger != null && a1 == a2) { 821 822 while (true) { 823 824 /* 825 * The first part must be larger. 826 */ 827 if (hi1 - lo1 < hi2 - lo2) { 828 int lo = lo1; lo1 = lo2; lo2 = lo; 829 int hi = hi1; hi1 = hi2; hi2 = hi; 830 } 831 832 /* 833 * Small parts will be merged sequentially. 834 */ 835 if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) { 836 break; 837 } 838 839 /* 840 * Find the median of the larger part. 841 */ 842 int mi1 = (lo1 + hi1) >>> 1; 843 int key = a1[mi1]; 844 int mi2 = hi2; 845 846 /* 847 * Partition the smaller part. 848 */ 849 for (int loo = lo2; loo < mi2; ) { 850 int t = (loo + mi2) >>> 1; 851 852 if (key > a2[t]) { 853 loo = t + 1; 854 } else { 855 mi2 = t; 856 } 857 } 858 859 int d = mi2 - lo2 + mi1 - lo1; 860 861 /* 862 * Merge the right sub-parts in parallel. 863 */ 864 merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2); 865 866 /* 867 * Process the sub-left parts. 868 */ 869 hi1 = mi1; 870 hi2 = mi2; 871 } 872 } 873 874 /* 875 * Merge small parts sequentially. 876 */ 877 while (lo1 < hi1 && lo2 < hi2) { 878 dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++]; 879 } 880 if (dst != a1 || k < lo1) { 881 while (lo1 < hi1) { 882 dst[k++] = a1[lo1++]; 883 } 884 } 885 if (dst != a2 || k < lo2) { 886 while (lo2 < hi2) { 887 dst[k++] = a2[lo2++]; 888 } 889 } 890 } 891 892 // [long] 893 894 /** 895 * Sorts the specified range of the array using parallel merge 896 * sort and/or Dual-Pivot Quicksort. 897 * 898 * To balance the faster splitting and parallelism of merge sort 899 * with the faster element partitioning of Quicksort, ranges are 900 * subdivided in tiers such that, if there is enough parallelism, 901 * the four-way parallel merge is started, still ensuring enough 902 * parallelism to process the partitions. 903 * 904 * @param a the array to be sorted 905 * @param parallelism the parallelism level 906 * @param low the index of the first element, inclusive, to be sorted 907 * @param high the index of the last element, exclusive, to be sorted 908 */ 909 static void sort(long[] a, int parallelism, int low, int high) { 910 int size = high - low; 911 912 if (parallelism > 1 && size > MIN_PARALLEL_SORT_SIZE) { 913 int depth = getDepth(parallelism, size >> 12); 914 long[] b = depth == 0 ? null : new long[size]; 915 new Sorter(null, a, b, low, size, low, depth).invoke(); 916 } else { 917 sort(null, a, 0, low, high); 918 } 919 } 920 921 /** 922 * Sorts the specified array using the Dual-Pivot Quicksort and/or 923 * other sorts in special-cases, possibly with parallel partitions. 924 * 925 * @param sorter parallel context 926 * @param a the array to be sorted 927 * @param bits the combination of recursion depth and bit flag, where 928 * the right bit "0" indicates that array is the leftmost part 929 * @param low the index of the first element, inclusive, to be sorted 930 * @param high the index of the last element, exclusive, to be sorted 931 */ 932 static void sort(Sorter sorter, long[] a, int bits, int low, int high) { 933 while (true) { 934 int end = high - 1, size = high - low; 935 936 /* 937 * Run mixed insertion sort on small non-leftmost parts. 938 */ 939 if (size < MAX_MIXED_INSERTION_SORT_SIZE + bits && (bits & 1) > 0) { 940 mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high); 941 return; 942 } 943 944 /* 945 * Invoke insertion sort on small leftmost part. 946 */ 947 if (size < MAX_INSERTION_SORT_SIZE) { 948 insertionSort(a, low, high); 949 return; 950 } 951 952 /* 953 * Check if the whole array or large non-leftmost 954 * parts are nearly sorted and then merge runs. 955 */ 956 if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0) 957 && tryMergeRuns(sorter, a, low, size)) { 958 return; 959 } 960 961 /* 962 * Switch to heap sort if execution 963 * time is becoming quadratic. 964 */ 965 if ((bits += DELTA) > MAX_RECURSION_DEPTH) { 966 heapSort(a, low, high); 967 return; 968 } 969 970 /* 971 * Use an inexpensive approximation of the golden ratio 972 * to select five sample elements and determine pivots. 973 */ 974 int step = (size >> 3) * 3 + 3; 975 976 /* 977 * Five elements around (and including) the central element 978 * will be used for pivot selection as described below. The 979 * unequal choice of spacing these elements was empirically 980 * determined to work well on a wide variety of inputs. 981 */ 982 int e1 = low + step; 983 int e5 = end - step; 984 int e3 = (e1 + e5) >>> 1; 985 int e2 = (e1 + e3) >>> 1; 986 int e4 = (e3 + e5) >>> 1; 987 long a3 = a[e3]; 988 989 /* 990 * Sort these elements in place by the combination 991 * of 4-element sorting network and insertion sort. 992 * 993 * 5 ------o-----------o------------ 994 * | | 995 * 4 ------|-----o-----o-----o------ 996 * | | | 997 * 2 ------o-----|-----o-----o------ 998 * | | 999 * 1 ------------o-----o------------ 1000 */ 1001 if (a[e5] < a[e2]) { long t = a[e5]; a[e5] = a[e2]; a[e2] = t; } 1002 if (a[e4] < a[e1]) { long t = a[e4]; a[e4] = a[e1]; a[e1] = t; } 1003 if (a[e5] < a[e4]) { long t = a[e5]; a[e5] = a[e4]; a[e4] = t; } 1004 if (a[e2] < a[e1]) { long t = a[e2]; a[e2] = a[e1]; a[e1] = t; } 1005 if (a[e4] < a[e2]) { long t = a[e4]; a[e4] = a[e2]; a[e2] = t; } 1006 1007 if (a3 < a[e2]) { 1008 if (a3 < a[e1]) { 1009 a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; 1010 } else { 1011 a[e3] = a[e2]; a[e2] = a3; 1012 } 1013 } else if (a3 > a[e4]) { 1014 if (a3 > a[e5]) { 1015 a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; 1016 } else { 1017 a[e3] = a[e4]; a[e4] = a3; 1018 } 1019 } 1020 1021 // Pointers 1022 int lower = low; // The index of the last element of the left part 1023 int upper = end; // The index of the first element of the right part 1024 1025 /* 1026 * Partitioning with 2 pivots in case of different elements. 1027 */ 1028 if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { 1029 1030 /* 1031 * Use the first and fifth of the five sorted elements as 1032 * the pivots. These values are inexpensive approximation 1033 * of tertiles. Note, that pivot1 < pivot2. 1034 */ 1035 long pivot1 = a[e1]; 1036 long pivot2 = a[e5]; 1037 1038 /* 1039 * The first and the last elements to be sorted are moved 1040 * to the locations formerly occupied by the pivots. When 1041 * partitioning is completed, the pivots are swapped back 1042 * into their final positions, and excluded from the next 1043 * subsequent sorting. 1044 */ 1045 a[e1] = a[lower]; 1046 a[e5] = a[upper]; 1047 1048 /* 1049 * Skip elements, which are less or greater than the pivots. 1050 */ 1051 while (a[++lower] < pivot1); 1052 while (a[--upper] > pivot2); 1053 1054 /* 1055 * Backward 3-interval partitioning 1056 * 1057 * left part central part right part 1058 * +------------------------------------------------------------+ 1059 * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | 1060 * +------------------------------------------------------------+ 1061 * ^ ^ ^ 1062 * | | | 1063 * lower k upper 1064 * 1065 * Invariants: 1066 * 1067 * all in (low, lower] < pivot1 1068 * pivot1 <= all in (k, upper) <= pivot2 1069 * all in [upper, end) > pivot2 1070 * 1071 * Pointer k is the last index of ?-part 1072 */ 1073 for (int unused = --lower, k = ++upper; --k > lower; ) { 1074 long ak = a[k]; 1075 1076 if (ak < pivot1) { // Move a[k] to the left side 1077 while (lower < k) { 1078 if (a[++lower] >= pivot1) { 1079 if (a[lower] > pivot2) { 1080 a[k] = a[--upper]; 1081 a[upper] = a[lower]; 1082 } else { 1083 a[k] = a[lower]; 1084 } 1085 a[lower] = ak; 1086 break; 1087 } 1088 } 1089 } else if (ak > pivot2) { // Move a[k] to the right side 1090 a[k] = a[--upper]; 1091 a[upper] = ak; 1092 } 1093 } 1094 1095 /* 1096 * Swap the pivots into their final positions. 1097 */ 1098 a[low] = a[lower]; a[lower] = pivot1; 1099 a[end] = a[upper]; a[upper] = pivot2; 1100 1101 /* 1102 * Sort non-left parts recursively (possibly in parallel), 1103 * excluding known pivots. 1104 */ 1105 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { 1106 sorter.forkSorter(bits | 1, lower + 1, upper); 1107 sorter.forkSorter(bits | 1, upper + 1, high); 1108 } else { 1109 sort(sorter, a, bits | 1, lower + 1, upper); 1110 sort(sorter, a, bits | 1, upper + 1, high); 1111 } 1112 1113 } else { // Use single pivot in case of many equal elements 1114 1115 /* 1116 * Use the third of the five sorted elements as the pivot. 1117 * This value is inexpensive approximation of the median. 1118 */ 1119 long pivot = a[e3]; 1120 1121 /* 1122 * The first element to be sorted is moved to the 1123 * location formerly occupied by the pivot. After 1124 * completion of partitioning the pivot is swapped 1125 * back into its final position, and excluded from 1126 * the next subsequent sorting. 1127 */ 1128 a[e3] = a[lower]; 1129 1130 /* 1131 * Traditional 3-way (Dutch National Flag) partitioning 1132 * 1133 * left part central part right part 1134 * +------------------------------------------------------+ 1135 * | < pivot | ? | == pivot | > pivot | 1136 * +------------------------------------------------------+ 1137 * ^ ^ ^ 1138 * | | | 1139 * lower k upper 1140 * 1141 * Invariants: 1142 * 1143 * all in (low, lower] < pivot 1144 * all in (k, upper) == pivot 1145 * all in [upper, end] > pivot 1146 * 1147 * Pointer k is the last index of ?-part 1148 */ 1149 for (int k = ++upper; --k > lower; ) { 1150 long ak = a[k]; 1151 1152 if (ak != pivot) { 1153 a[k] = pivot; 1154 1155 if (ak < pivot) { // Move a[k] to the left side 1156 while (a[++lower] < pivot); 1157 1158 if (a[lower] > pivot) { 1159 a[--upper] = a[lower]; 1160 } 1161 a[lower] = ak; 1162 } else { // ak > pivot - Move a[k] to the right side 1163 a[--upper] = ak; 1164 } 1165 } 1166 } 1167 1168 /* 1169 * Swap the pivot into its final position. 1170 */ 1171 a[low] = a[lower]; a[lower] = pivot; 1172 1173 /* 1174 * Sort the right part (possibly in parallel), excluding 1175 * known pivot. All elements from the central part are 1176 * equal and therefore already sorted. 1177 */ 1178 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { 1179 sorter.forkSorter(bits | 1, upper, high); 1180 } else { 1181 sort(sorter, a, bits | 1, upper, high); 1182 } 1183 } 1184 high = lower; // Iterate along the left part 1185 } 1186 } 1187 1188 /** 1189 * Sorts the specified range of the array using mixed insertion sort. 1190 * 1191 * Mixed insertion sort is combination of simple insertion sort, 1192 * pin insertion sort and pair insertion sort. 1193 * 1194 * In the context of Dual-Pivot Quicksort, the pivot element 1195 * from the left part plays the role of sentinel, because it 1196 * is less than any elements from the given part. Therefore, 1197 * expensive check of the left range can be skipped on each 1198 * iteration unless it is the leftmost call. 1199 * 1200 * @param a the array to be sorted 1201 * @param low the index of the first element, inclusive, to be sorted 1202 * @param end the index of the last element for simple insertion sort 1203 * @param high the index of the last element, exclusive, to be sorted 1204 */ mixedInsertionSort(long[] a, int low, int end, int high)1205 private static void mixedInsertionSort(long[] a, int low, int end, int high) { 1206 if (end == high) { 1207 1208 /* 1209 * Invoke simple insertion sort on tiny array. 1210 */ 1211 for (int i; ++low < end; ) { 1212 long ai = a[i = low]; 1213 1214 while (ai < a[--i]) { 1215 a[i + 1] = a[i]; 1216 } 1217 a[i + 1] = ai; 1218 } 1219 } else { 1220 1221 /* 1222 * Start with pin insertion sort on small part. 1223 * 1224 * Pin insertion sort is extended simple insertion sort. 1225 * The main idea of this sort is to put elements larger 1226 * than an element called pin to the end of array (the 1227 * proper area for such elements). It avoids expensive 1228 * movements of these elements through the whole array. 1229 */ 1230 long pin = a[end]; 1231 1232 for (int i, p = high; ++low < end; ) { 1233 long ai = a[i = low]; 1234 1235 if (ai < a[i - 1]) { // Small element 1236 1237 /* 1238 * Insert small element into sorted part. 1239 */ 1240 a[i] = a[--i]; 1241 1242 while (ai < a[--i]) { 1243 a[i + 1] = a[i]; 1244 } 1245 a[i + 1] = ai; 1246 1247 } else if (p > i && ai > pin) { // Large element 1248 1249 /* 1250 * Find element smaller than pin. 1251 */ 1252 while (a[--p] > pin); 1253 1254 /* 1255 * Swap it with large element. 1256 */ 1257 if (p > i) { 1258 ai = a[p]; 1259 a[p] = a[i]; 1260 } 1261 1262 /* 1263 * Insert small element into sorted part. 1264 */ 1265 while (ai < a[--i]) { 1266 a[i + 1] = a[i]; 1267 } 1268 a[i + 1] = ai; 1269 } 1270 } 1271 1272 /* 1273 * Continue with pair insertion sort on remain part. 1274 */ 1275 for (int i; low < high; ++low) { 1276 long a1 = a[i = low], a2 = a[++low]; 1277 1278 /* 1279 * Insert two elements per iteration: at first, insert the 1280 * larger element and then insert the smaller element, but 1281 * from the position where the larger element was inserted. 1282 */ 1283 if (a1 > a2) { 1284 1285 while (a1 < a[--i]) { 1286 a[i + 2] = a[i]; 1287 } 1288 a[++i + 1] = a1; 1289 1290 while (a2 < a[--i]) { 1291 a[i + 1] = a[i]; 1292 } 1293 a[i + 1] = a2; 1294 1295 } else if (a1 < a[i - 1]) { 1296 1297 while (a2 < a[--i]) { 1298 a[i + 2] = a[i]; 1299 } 1300 a[++i + 1] = a2; 1301 1302 while (a1 < a[--i]) { 1303 a[i + 1] = a[i]; 1304 } 1305 a[i + 1] = a1; 1306 } 1307 } 1308 } 1309 } 1310 1311 /** 1312 * Sorts the specified range of the array using insertion sort. 1313 * 1314 * @param a the array to be sorted 1315 * @param low the index of the first element, inclusive, to be sorted 1316 * @param high the index of the last element, exclusive, to be sorted 1317 */ insertionSort(long[] a, int low, int high)1318 private static void insertionSort(long[] a, int low, int high) { 1319 for (int i, k = low; ++k < high; ) { 1320 long ai = a[i = k]; 1321 1322 if (ai < a[i - 1]) { 1323 while (--i >= low && ai < a[i]) { 1324 a[i + 1] = a[i]; 1325 } 1326 a[i + 1] = ai; 1327 } 1328 } 1329 } 1330 1331 /** 1332 * Sorts the specified range of the array using heap sort. 1333 * 1334 * @param a the array to be sorted 1335 * @param low the index of the first element, inclusive, to be sorted 1336 * @param high the index of the last element, exclusive, to be sorted 1337 */ heapSort(long[] a, int low, int high)1338 private static void heapSort(long[] a, int low, int high) { 1339 for (int k = (low + high) >>> 1; k > low; ) { 1340 pushDown(a, --k, a[k], low, high); 1341 } 1342 while (--high > low) { 1343 long max = a[low]; 1344 pushDown(a, low, a[high], low, high); 1345 a[high] = max; 1346 } 1347 } 1348 1349 /** 1350 * Pushes specified element down during heap sort. 1351 * 1352 * @param a the given array 1353 * @param p the start index 1354 * @param value the given element 1355 * @param low the index of the first element, inclusive, to be sorted 1356 * @param high the index of the last element, exclusive, to be sorted 1357 */ pushDown(long[] a, int p, long value, int low, int high)1358 private static void pushDown(long[] a, int p, long value, int low, int high) { 1359 for (int k ;; a[p] = a[p = k]) { 1360 k = (p << 1) - low + 2; // Index of the right child 1361 1362 if (k > high) { 1363 break; 1364 } 1365 if (k == high || a[k] < a[k - 1]) { 1366 --k; 1367 } 1368 if (a[k] <= value) { 1369 break; 1370 } 1371 } 1372 a[p] = value; 1373 } 1374 1375 /** 1376 * Tries to sort the specified range of the array. 1377 * 1378 * @param sorter parallel context 1379 * @param a the array to be sorted 1380 * @param low the index of the first element to be sorted 1381 * @param size the array size 1382 * @return true if finally sorted, false otherwise 1383 */ tryMergeRuns(Sorter sorter, long[] a, int low, int size)1384 private static boolean tryMergeRuns(Sorter sorter, long[] a, int low, int size) { 1385 1386 /* 1387 * The run array is constructed only if initial runs are 1388 * long enough to continue, run[i] then holds start index 1389 * of the i-th sequence of elements in non-descending order. 1390 */ 1391 int[] run = null; 1392 int high = low + size; 1393 int count = 1, last = low; 1394 1395 /* 1396 * Identify all possible runs. 1397 */ 1398 for (int k = low + 1; k < high; ) { 1399 1400 /* 1401 * Find the end index of the current run. 1402 */ 1403 if (a[k - 1] < a[k]) { 1404 1405 // Identify ascending sequence 1406 while (++k < high && a[k - 1] <= a[k]); 1407 1408 } else if (a[k - 1] > a[k]) { 1409 1410 // Identify descending sequence 1411 while (++k < high && a[k - 1] >= a[k]); 1412 1413 // Reverse into ascending order 1414 for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) { 1415 long ai = a[i]; a[i] = a[j]; a[j] = ai; 1416 } 1417 } else { // Identify constant sequence 1418 for (long ak = a[k]; ++k < high && ak == a[k]; ); 1419 1420 if (k < high) { 1421 continue; 1422 } 1423 } 1424 1425 /* 1426 * Check special cases. 1427 */ 1428 if (run == null) { 1429 if (k == high) { 1430 1431 /* 1432 * The array is monotonous sequence, 1433 * and therefore already sorted. 1434 */ 1435 return true; 1436 } 1437 1438 if (k - low < MIN_FIRST_RUN_SIZE) { 1439 1440 /* 1441 * The first run is too small 1442 * to proceed with scanning. 1443 */ 1444 return false; 1445 } 1446 1447 run = new int[((size >> 10) | 0x7F) & 0x3FF]; 1448 run[0] = low; 1449 1450 } else if (a[last - 1] > a[last]) { 1451 1452 if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) { 1453 1454 /* 1455 * The first runs are not long 1456 * enough to continue scanning. 1457 */ 1458 return false; 1459 } 1460 1461 if (++count == MAX_RUN_CAPACITY) { 1462 1463 /* 1464 * Array is not highly structured. 1465 */ 1466 return false; 1467 } 1468 1469 if (count == run.length) { 1470 1471 /* 1472 * Increase capacity of index array. 1473 */ 1474 run = Arrays.copyOf(run, count << 1); 1475 } 1476 } 1477 run[count] = (last = k); 1478 } 1479 1480 /* 1481 * Merge runs of highly structured array. 1482 */ 1483 if (count > 1) { 1484 long[] b; int offset = low; 1485 1486 if (sorter == null || (b = (long[]) sorter.b) == null) { 1487 b = new long[size]; 1488 } else { 1489 offset = sorter.offset; 1490 } 1491 mergeRuns(a, b, offset, 1, sorter != null, run, 0, count); 1492 } 1493 return true; 1494 } 1495 1496 /** 1497 * Merges the specified runs. 1498 * 1499 * @param a the source array 1500 * @param b the temporary buffer used in merging 1501 * @param offset the start index in the source, inclusive 1502 * @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0) 1503 * @param parallel indicates whether merging is performed in parallel 1504 * @param run the start indexes of the runs, inclusive 1505 * @param lo the start index of the first run, inclusive 1506 * @param hi the start index of the last run, inclusive 1507 * @return the destination where runs are merged 1508 */ mergeRuns(long[] a, long[] b, int offset, int aim, boolean parallel, int[] run, int lo, int hi)1509 private static long[] mergeRuns(long[] a, long[] b, int offset, 1510 int aim, boolean parallel, int[] run, int lo, int hi) { 1511 1512 if (hi - lo == 1) { 1513 if (aim >= 0) { 1514 return a; 1515 } 1516 for (int i = run[hi], j = i - offset, low = run[lo]; i > low; 1517 b[--j] = a[--i] 1518 ); 1519 return b; 1520 } 1521 1522 /* 1523 * Split into approximately equal parts. 1524 */ 1525 int mi = lo, rmi = (run[lo] + run[hi]) >>> 1; 1526 while (run[++mi + 1] <= rmi); 1527 1528 /* 1529 * Merge the left and right parts. 1530 */ 1531 long[] a1, a2; 1532 1533 if (parallel && hi - lo > MIN_RUN_COUNT) { 1534 RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe(); 1535 a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi); 1536 a2 = (long[]) merger.getDestination(); 1537 } else { 1538 a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi); 1539 a2 = mergeRuns(a, b, offset, 0, false, run, mi, hi); 1540 } 1541 1542 long[] dst = a1 == a ? b : a; 1543 1544 int k = a1 == a ? run[lo] - offset : run[lo]; 1545 int lo1 = a1 == b ? run[lo] - offset : run[lo]; 1546 int hi1 = a1 == b ? run[mi] - offset : run[mi]; 1547 int lo2 = a2 == b ? run[mi] - offset : run[mi]; 1548 int hi2 = a2 == b ? run[hi] - offset : run[hi]; 1549 1550 if (parallel) { 1551 new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke(); 1552 } else { 1553 mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2); 1554 } 1555 return dst; 1556 } 1557 1558 /** 1559 * Merges the sorted parts. 1560 * 1561 * @param merger parallel context 1562 * @param dst the destination where parts are merged 1563 * @param k the start index of the destination, inclusive 1564 * @param a1 the first part 1565 * @param lo1 the start index of the first part, inclusive 1566 * @param hi1 the end index of the first part, exclusive 1567 * @param a2 the second part 1568 * @param lo2 the start index of the second part, inclusive 1569 * @param hi2 the end index of the second part, exclusive 1570 */ mergeParts(Merger merger, long[] dst, int k, long[] a1, int lo1, int hi1, long[] a2, int lo2, int hi2)1571 private static void mergeParts(Merger merger, long[] dst, int k, 1572 long[] a1, int lo1, int hi1, long[] a2, int lo2, int hi2) { 1573 1574 if (merger != null && a1 == a2) { 1575 1576 while (true) { 1577 1578 /* 1579 * The first part must be larger. 1580 */ 1581 if (hi1 - lo1 < hi2 - lo2) { 1582 int lo = lo1; lo1 = lo2; lo2 = lo; 1583 int hi = hi1; hi1 = hi2; hi2 = hi; 1584 } 1585 1586 /* 1587 * Small parts will be merged sequentially. 1588 */ 1589 if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) { 1590 break; 1591 } 1592 1593 /* 1594 * Find the median of the larger part. 1595 */ 1596 int mi1 = (lo1 + hi1) >>> 1; 1597 long key = a1[mi1]; 1598 int mi2 = hi2; 1599 1600 /* 1601 * Partition the smaller part. 1602 */ 1603 for (int loo = lo2; loo < mi2; ) { 1604 int t = (loo + mi2) >>> 1; 1605 1606 if (key > a2[t]) { 1607 loo = t + 1; 1608 } else { 1609 mi2 = t; 1610 } 1611 } 1612 1613 int d = mi2 - lo2 + mi1 - lo1; 1614 1615 /* 1616 * Merge the right sub-parts in parallel. 1617 */ 1618 merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2); 1619 1620 /* 1621 * Process the sub-left parts. 1622 */ 1623 hi1 = mi1; 1624 hi2 = mi2; 1625 } 1626 } 1627 1628 /* 1629 * Merge small parts sequentially. 1630 */ 1631 while (lo1 < hi1 && lo2 < hi2) { 1632 dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++]; 1633 } 1634 if (dst != a1 || k < lo1) { 1635 while (lo1 < hi1) { 1636 dst[k++] = a1[lo1++]; 1637 } 1638 } 1639 if (dst != a2 || k < lo2) { 1640 while (lo2 < hi2) { 1641 dst[k++] = a2[lo2++]; 1642 } 1643 } 1644 } 1645 1646 // [byte] 1647 1648 /** 1649 * Sorts the specified range of the array using 1650 * counting sort or insertion sort. 1651 * 1652 * @param a the array to be sorted 1653 * @param low the index of the first element, inclusive, to be sorted 1654 * @param high the index of the last element, exclusive, to be sorted 1655 */ 1656 static void sort(byte[] a, int low, int high) { 1657 if (high - low > MIN_BYTE_COUNTING_SORT_SIZE) { 1658 countingSort(a, low, high); 1659 } else { 1660 insertionSort(a, low, high); 1661 } 1662 } 1663 1664 /** 1665 * Sorts the specified range of the array using insertion sort. 1666 * 1667 * @param a the array to be sorted 1668 * @param low the index of the first element, inclusive, to be sorted 1669 * @param high the index of the last element, exclusive, to be sorted 1670 */ 1671 private static void insertionSort(byte[] a, int low, int high) { 1672 for (int i, k = low; ++k < high; ) { 1673 byte ai = a[i = k]; 1674 1675 if (ai < a[i - 1]) { 1676 while (--i >= low && ai < a[i]) { 1677 a[i + 1] = a[i]; 1678 } 1679 a[i + 1] = ai; 1680 } 1681 } 1682 } 1683 1684 /** 1685 * The number of distinct byte values. 1686 */ 1687 private static final int NUM_BYTE_VALUES = 1 << 8; 1688 1689 /** 1690 * Max index of byte counter. 1691 */ 1692 private static final int MAX_BYTE_INDEX = Byte.MAX_VALUE + NUM_BYTE_VALUES + 1; 1693 1694 /** 1695 * Sorts the specified range of the array using counting sort. 1696 * 1697 * @param a the array to be sorted 1698 * @param low the index of the first element, inclusive, to be sorted 1699 * @param high the index of the last element, exclusive, to be sorted 1700 */ 1701 private static void countingSort(byte[] a, int low, int high) { 1702 int[] count = new int[NUM_BYTE_VALUES]; 1703 1704 /* 1705 * Compute a histogram with the number of each values. 1706 */ 1707 for (int i = high; i > low; ++count[a[--i] & 0xFF]); 1708 1709 /* 1710 * Place values on their final positions. 1711 */ 1712 if (high - low > NUM_BYTE_VALUES) { 1713 for (int i = MAX_BYTE_INDEX; --i > Byte.MAX_VALUE; ) { 1714 int value = i & 0xFF; 1715 1716 for (low = high - count[value]; high > low; 1717 a[--high] = (byte) value 1718 ); 1719 } 1720 } else { 1721 for (int i = MAX_BYTE_INDEX; high > low; ) { 1722 while (count[--i & 0xFF] == 0); 1723 1724 int value = i & 0xFF; 1725 int c = count[value]; 1726 1727 do { 1728 a[--high] = (byte) value; 1729 } while (--c > 0); 1730 } 1731 } 1732 } 1733 1734 // [char] 1735 1736 /** 1737 * Sorts the specified range of the array using 1738 * counting sort or Dual-Pivot Quicksort. 1739 * 1740 * @param a the array to be sorted 1741 * @param low the index of the first element, inclusive, to be sorted 1742 * @param high the index of the last element, exclusive, to be sorted 1743 */ 1744 static void sort(char[] a, int low, int high) { 1745 if (high - low > MIN_SHORT_OR_CHAR_COUNTING_SORT_SIZE) { 1746 countingSort(a, low, high); 1747 } else { 1748 sort(a, 0, low, high); 1749 } 1750 } 1751 1752 /** 1753 * Sorts the specified array using the Dual-Pivot Quicksort and/or 1754 * other sorts in special-cases, possibly with parallel partitions. 1755 * 1756 * @param a the array to be sorted 1757 * @param bits the combination of recursion depth and bit flag, where 1758 * the right bit "0" indicates that array is the leftmost part 1759 * @param low the index of the first element, inclusive, to be sorted 1760 * @param high the index of the last element, exclusive, to be sorted 1761 */ 1762 static void sort(char[] a, int bits, int low, int high) { 1763 while (true) { 1764 int end = high - 1, size = high - low; 1765 1766 /* 1767 * Invoke insertion sort on small leftmost part. 1768 */ 1769 if (size < MAX_INSERTION_SORT_SIZE) { 1770 insertionSort(a, low, high); 1771 return; 1772 } 1773 1774 /* 1775 * Switch to counting sort if execution 1776 * time is becoming quadratic. 1777 */ 1778 if ((bits += DELTA) > MAX_RECURSION_DEPTH) { 1779 countingSort(a, low, high); 1780 return; 1781 } 1782 1783 /* 1784 * Use an inexpensive approximation of the golden ratio 1785 * to select five sample elements and determine pivots. 1786 */ 1787 int step = (size >> 3) * 3 + 3; 1788 1789 /* 1790 * Five elements around (and including) the central element 1791 * will be used for pivot selection as described below. The 1792 * unequal choice of spacing these elements was empirically 1793 * determined to work well on a wide variety of inputs. 1794 */ 1795 int e1 = low + step; 1796 int e5 = end - step; 1797 int e3 = (e1 + e5) >>> 1; 1798 int e2 = (e1 + e3) >>> 1; 1799 int e4 = (e3 + e5) >>> 1; 1800 char a3 = a[e3]; 1801 1802 /* 1803 * Sort these elements in place by the combination 1804 * of 4-element sorting network and insertion sort. 1805 * 1806 * 5 ------o-----------o------------ 1807 * | | 1808 * 4 ------|-----o-----o-----o------ 1809 * | | | 1810 * 2 ------o-----|-----o-----o------ 1811 * | | 1812 * 1 ------------o-----o------------ 1813 */ 1814 if (a[e5] < a[e2]) { char t = a[e5]; a[e5] = a[e2]; a[e2] = t; } 1815 if (a[e4] < a[e1]) { char t = a[e4]; a[e4] = a[e1]; a[e1] = t; } 1816 if (a[e5] < a[e4]) { char t = a[e5]; a[e5] = a[e4]; a[e4] = t; } 1817 if (a[e2] < a[e1]) { char t = a[e2]; a[e2] = a[e1]; a[e1] = t; } 1818 if (a[e4] < a[e2]) { char t = a[e4]; a[e4] = a[e2]; a[e2] = t; } 1819 1820 if (a3 < a[e2]) { 1821 if (a3 < a[e1]) { 1822 a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; 1823 } else { 1824 a[e3] = a[e2]; a[e2] = a3; 1825 } 1826 } else if (a3 > a[e4]) { 1827 if (a3 > a[e5]) { 1828 a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; 1829 } else { 1830 a[e3] = a[e4]; a[e4] = a3; 1831 } 1832 } 1833 1834 // Pointers 1835 int lower = low; // The index of the last element of the left part 1836 int upper = end; // The index of the first element of the right part 1837 1838 /* 1839 * Partitioning with 2 pivots in case of different elements. 1840 */ 1841 if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { 1842 1843 /* 1844 * Use the first and fifth of the five sorted elements as 1845 * the pivots. These values are inexpensive approximation 1846 * of tertiles. Note, that pivot1 < pivot2. 1847 */ 1848 char pivot1 = a[e1]; 1849 char pivot2 = a[e5]; 1850 1851 /* 1852 * The first and the last elements to be sorted are moved 1853 * to the locations formerly occupied by the pivots. When 1854 * partitioning is completed, the pivots are swapped back 1855 * into their final positions, and excluded from the next 1856 * subsequent sorting. 1857 */ 1858 a[e1] = a[lower]; 1859 a[e5] = a[upper]; 1860 1861 /* 1862 * Skip elements, which are less or greater than the pivots. 1863 */ 1864 while (a[++lower] < pivot1); 1865 while (a[--upper] > pivot2); 1866 1867 /* 1868 * Backward 3-interval partitioning 1869 * 1870 * left part central part right part 1871 * +------------------------------------------------------------+ 1872 * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | 1873 * +------------------------------------------------------------+ 1874 * ^ ^ ^ 1875 * | | | 1876 * lower k upper 1877 * 1878 * Invariants: 1879 * 1880 * all in (low, lower] < pivot1 1881 * pivot1 <= all in (k, upper) <= pivot2 1882 * all in [upper, end) > pivot2 1883 * 1884 * Pointer k is the last index of ?-part 1885 */ 1886 for (int unused = --lower, k = ++upper; --k > lower; ) { 1887 char ak = a[k]; 1888 1889 if (ak < pivot1) { // Move a[k] to the left side 1890 while (lower < k) { 1891 if (a[++lower] >= pivot1) { 1892 if (a[lower] > pivot2) { 1893 a[k] = a[--upper]; 1894 a[upper] = a[lower]; 1895 } else { 1896 a[k] = a[lower]; 1897 } 1898 a[lower] = ak; 1899 break; 1900 } 1901 } 1902 } else if (ak > pivot2) { // Move a[k] to the right side 1903 a[k] = a[--upper]; 1904 a[upper] = ak; 1905 } 1906 } 1907 1908 /* 1909 * Swap the pivots into their final positions. 1910 */ 1911 a[low] = a[lower]; a[lower] = pivot1; 1912 a[end] = a[upper]; a[upper] = pivot2; 1913 1914 /* 1915 * Sort non-left parts recursively, 1916 * excluding known pivots. 1917 */ 1918 sort(a, bits | 1, lower + 1, upper); 1919 sort(a, bits | 1, upper + 1, high); 1920 1921 } else { // Use single pivot in case of many equal elements 1922 1923 /* 1924 * Use the third of the five sorted elements as the pivot. 1925 * This value is inexpensive approximation of the median. 1926 */ 1927 char pivot = a[e3]; 1928 1929 /* 1930 * The first element to be sorted is moved to the 1931 * location formerly occupied by the pivot. After 1932 * completion of partitioning the pivot is swapped 1933 * back into its final position, and excluded from 1934 * the next subsequent sorting. 1935 */ 1936 a[e3] = a[lower]; 1937 1938 /* 1939 * Traditional 3-way (Dutch National Flag) partitioning 1940 * 1941 * left part central part right part 1942 * +------------------------------------------------------+ 1943 * | < pivot | ? | == pivot | > pivot | 1944 * +------------------------------------------------------+ 1945 * ^ ^ ^ 1946 * | | | 1947 * lower k upper 1948 * 1949 * Invariants: 1950 * 1951 * all in (low, lower] < pivot 1952 * all in (k, upper) == pivot 1953 * all in [upper, end] > pivot 1954 * 1955 * Pointer k is the last index of ?-part 1956 */ 1957 for (int k = ++upper; --k > lower; ) { 1958 char ak = a[k]; 1959 1960 if (ak != pivot) { 1961 a[k] = pivot; 1962 1963 if (ak < pivot) { // Move a[k] to the left side 1964 while (a[++lower] < pivot); 1965 1966 if (a[lower] > pivot) { 1967 a[--upper] = a[lower]; 1968 } 1969 a[lower] = ak; 1970 } else { // ak > pivot - Move a[k] to the right side 1971 a[--upper] = ak; 1972 } 1973 } 1974 } 1975 1976 /* 1977 * Swap the pivot into its final position. 1978 */ 1979 a[low] = a[lower]; a[lower] = pivot; 1980 1981 /* 1982 * Sort the right part, excluding known pivot. 1983 * All elements from the central part are 1984 * equal and therefore already sorted. 1985 */ 1986 sort(a, bits | 1, upper, high); 1987 } 1988 high = lower; // Iterate along the left part 1989 } 1990 } 1991 1992 /** 1993 * Sorts the specified range of the array using insertion sort. 1994 * 1995 * @param a the array to be sorted 1996 * @param low the index of the first element, inclusive, to be sorted 1997 * @param high the index of the last element, exclusive, to be sorted 1998 */ insertionSort(char[] a, int low, int high)1999 private static void insertionSort(char[] a, int low, int high) { 2000 for (int i, k = low; ++k < high; ) { 2001 char ai = a[i = k]; 2002 2003 if (ai < a[i - 1]) { 2004 while (--i >= low && ai < a[i]) { 2005 a[i + 1] = a[i]; 2006 } 2007 a[i + 1] = ai; 2008 } 2009 } 2010 } 2011 2012 /** 2013 * The number of distinct char values. 2014 */ 2015 private static final int NUM_CHAR_VALUES = 1 << 16; 2016 2017 /** 2018 * Sorts the specified range of the array using counting sort. 2019 * 2020 * @param a the array to be sorted 2021 * @param low the index of the first element, inclusive, to be sorted 2022 * @param high the index of the last element, exclusive, to be sorted 2023 */ countingSort(char[] a, int low, int high)2024 private static void countingSort(char[] a, int low, int high) { 2025 int[] count = new int[NUM_CHAR_VALUES]; 2026 2027 /* 2028 * Compute a histogram with the number of each values. 2029 */ 2030 for (int i = high; i > low; ++count[a[--i]]); 2031 2032 /* 2033 * Place values on their final positions. 2034 */ 2035 if (high - low > NUM_CHAR_VALUES) { 2036 for (int i = NUM_CHAR_VALUES; i > 0; ) { 2037 for (low = high - count[--i]; high > low; 2038 a[--high] = (char) i 2039 ); 2040 } 2041 } else { 2042 for (int i = NUM_CHAR_VALUES; high > low; ) { 2043 while (count[--i] == 0); 2044 int c = count[i]; 2045 2046 do { 2047 a[--high] = (char) i; 2048 } while (--c > 0); 2049 } 2050 } 2051 } 2052 2053 // [short] 2054 2055 /** 2056 * Sorts the specified range of the array using 2057 * counting sort or Dual-Pivot Quicksort. 2058 * 2059 * @param a the array to be sorted 2060 * @param low the index of the first element, inclusive, to be sorted 2061 * @param high the index of the last element, exclusive, to be sorted 2062 */ sort(short[] a, int low, int high)2063 static void sort(short[] a, int low, int high) { 2064 if (high - low > MIN_SHORT_OR_CHAR_COUNTING_SORT_SIZE) { 2065 countingSort(a, low, high); 2066 } else { 2067 sort(a, 0, low, high); 2068 } 2069 } 2070 2071 /** 2072 * Sorts the specified array using the Dual-Pivot Quicksort and/or 2073 * other sorts in special-cases, possibly with parallel partitions. 2074 * 2075 * @param a the array to be sorted 2076 * @param bits the combination of recursion depth and bit flag, where 2077 * the right bit "0" indicates that array is the leftmost part 2078 * @param low the index of the first element, inclusive, to be sorted 2079 * @param high the index of the last element, exclusive, to be sorted 2080 */ sort(short[] a, int bits, int low, int high)2081 static void sort(short[] a, int bits, int low, int high) { 2082 while (true) { 2083 int end = high - 1, size = high - low; 2084 2085 /* 2086 * Invoke insertion sort on small leftmost part. 2087 */ 2088 if (size < MAX_INSERTION_SORT_SIZE) { 2089 insertionSort(a, low, high); 2090 return; 2091 } 2092 2093 /* 2094 * Switch to counting sort if execution 2095 * time is becoming quadratic. 2096 */ 2097 if ((bits += DELTA) > MAX_RECURSION_DEPTH) { 2098 countingSort(a, low, high); 2099 return; 2100 } 2101 2102 /* 2103 * Use an inexpensive approximation of the golden ratio 2104 * to select five sample elements and determine pivots. 2105 */ 2106 int step = (size >> 3) * 3 + 3; 2107 2108 /* 2109 * Five elements around (and including) the central element 2110 * will be used for pivot selection as described below. The 2111 * unequal choice of spacing these elements was empirically 2112 * determined to work well on a wide variety of inputs. 2113 */ 2114 int e1 = low + step; 2115 int e5 = end - step; 2116 int e3 = (e1 + e5) >>> 1; 2117 int e2 = (e1 + e3) >>> 1; 2118 int e4 = (e3 + e5) >>> 1; 2119 short a3 = a[e3]; 2120 2121 /* 2122 * Sort these elements in place by the combination 2123 * of 4-element sorting network and insertion sort. 2124 * 2125 * 5 ------o-----------o------------ 2126 * | | 2127 * 4 ------|-----o-----o-----o------ 2128 * | | | 2129 * 2 ------o-----|-----o-----o------ 2130 * | | 2131 * 1 ------------o-----o------------ 2132 */ 2133 if (a[e5] < a[e2]) { short t = a[e5]; a[e5] = a[e2]; a[e2] = t; } 2134 if (a[e4] < a[e1]) { short t = a[e4]; a[e4] = a[e1]; a[e1] = t; } 2135 if (a[e5] < a[e4]) { short t = a[e5]; a[e5] = a[e4]; a[e4] = t; } 2136 if (a[e2] < a[e1]) { short t = a[e2]; a[e2] = a[e1]; a[e1] = t; } 2137 if (a[e4] < a[e2]) { short t = a[e4]; a[e4] = a[e2]; a[e2] = t; } 2138 2139 if (a3 < a[e2]) { 2140 if (a3 < a[e1]) { 2141 a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; 2142 } else { 2143 a[e3] = a[e2]; a[e2] = a3; 2144 } 2145 } else if (a3 > a[e4]) { 2146 if (a3 > a[e5]) { 2147 a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; 2148 } else { 2149 a[e3] = a[e4]; a[e4] = a3; 2150 } 2151 } 2152 2153 // Pointers 2154 int lower = low; // The index of the last element of the left part 2155 int upper = end; // The index of the first element of the right part 2156 2157 /* 2158 * Partitioning with 2 pivots in case of different elements. 2159 */ 2160 if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { 2161 2162 /* 2163 * Use the first and fifth of the five sorted elements as 2164 * the pivots. These values are inexpensive approximation 2165 * of tertiles. Note, that pivot1 < pivot2. 2166 */ 2167 short pivot1 = a[e1]; 2168 short pivot2 = a[e5]; 2169 2170 /* 2171 * The first and the last elements to be sorted are moved 2172 * to the locations formerly occupied by the pivots. When 2173 * partitioning is completed, the pivots are swapped back 2174 * into their final positions, and excluded from the next 2175 * subsequent sorting. 2176 */ 2177 a[e1] = a[lower]; 2178 a[e5] = a[upper]; 2179 2180 /* 2181 * Skip elements, which are less or greater than the pivots. 2182 */ 2183 while (a[++lower] < pivot1); 2184 while (a[--upper] > pivot2); 2185 2186 /* 2187 * Backward 3-interval partitioning 2188 * 2189 * left part central part right part 2190 * +------------------------------------------------------------+ 2191 * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | 2192 * +------------------------------------------------------------+ 2193 * ^ ^ ^ 2194 * | | | 2195 * lower k upper 2196 * 2197 * Invariants: 2198 * 2199 * all in (low, lower] < pivot1 2200 * pivot1 <= all in (k, upper) <= pivot2 2201 * all in [upper, end) > pivot2 2202 * 2203 * Pointer k is the last index of ?-part 2204 */ 2205 for (int unused = --lower, k = ++upper; --k > lower; ) { 2206 short ak = a[k]; 2207 2208 if (ak < pivot1) { // Move a[k] to the left side 2209 while (lower < k) { 2210 if (a[++lower] >= pivot1) { 2211 if (a[lower] > pivot2) { 2212 a[k] = a[--upper]; 2213 a[upper] = a[lower]; 2214 } else { 2215 a[k] = a[lower]; 2216 } 2217 a[lower] = ak; 2218 break; 2219 } 2220 } 2221 } else if (ak > pivot2) { // Move a[k] to the right side 2222 a[k] = a[--upper]; 2223 a[upper] = ak; 2224 } 2225 } 2226 2227 /* 2228 * Swap the pivots into their final positions. 2229 */ 2230 a[low] = a[lower]; a[lower] = pivot1; 2231 a[end] = a[upper]; a[upper] = pivot2; 2232 2233 /* 2234 * Sort non-left parts recursively, 2235 * excluding known pivots. 2236 */ 2237 sort(a, bits | 1, lower + 1, upper); 2238 sort(a, bits | 1, upper + 1, high); 2239 2240 } else { // Use single pivot in case of many equal elements 2241 2242 /* 2243 * Use the third of the five sorted elements as the pivot. 2244 * This value is inexpensive approximation of the median. 2245 */ 2246 short pivot = a[e3]; 2247 2248 /* 2249 * The first element to be sorted is moved to the 2250 * location formerly occupied by the pivot. After 2251 * completion of partitioning the pivot is swapped 2252 * back into its final position, and excluded from 2253 * the next subsequent sorting. 2254 */ 2255 a[e3] = a[lower]; 2256 2257 /* 2258 * Traditional 3-way (Dutch National Flag) partitioning 2259 * 2260 * left part central part right part 2261 * +------------------------------------------------------+ 2262 * | < pivot | ? | == pivot | > pivot | 2263 * +------------------------------------------------------+ 2264 * ^ ^ ^ 2265 * | | | 2266 * lower k upper 2267 * 2268 * Invariants: 2269 * 2270 * all in (low, lower] < pivot 2271 * all in (k, upper) == pivot 2272 * all in [upper, end] > pivot 2273 * 2274 * Pointer k is the last index of ?-part 2275 */ 2276 for (int k = ++upper; --k > lower; ) { 2277 short ak = a[k]; 2278 2279 if (ak != pivot) { 2280 a[k] = pivot; 2281 2282 if (ak < pivot) { // Move a[k] to the left side 2283 while (a[++lower] < pivot); 2284 2285 if (a[lower] > pivot) { 2286 a[--upper] = a[lower]; 2287 } 2288 a[lower] = ak; 2289 } else { // ak > pivot - Move a[k] to the right side 2290 a[--upper] = ak; 2291 } 2292 } 2293 } 2294 2295 /* 2296 * Swap the pivot into its final position. 2297 */ 2298 a[low] = a[lower]; a[lower] = pivot; 2299 2300 /* 2301 * Sort the right part, excluding known pivot. 2302 * All elements from the central part are 2303 * equal and therefore already sorted. 2304 */ 2305 sort(a, bits | 1, upper, high); 2306 } 2307 high = lower; // Iterate along the left part 2308 } 2309 } 2310 2311 /** 2312 * Sorts the specified range of the array using insertion sort. 2313 * 2314 * @param a the array to be sorted 2315 * @param low the index of the first element, inclusive, to be sorted 2316 * @param high the index of the last element, exclusive, to be sorted 2317 */ insertionSort(short[] a, int low, int high)2318 private static void insertionSort(short[] a, int low, int high) { 2319 for (int i, k = low; ++k < high; ) { 2320 short ai = a[i = k]; 2321 2322 if (ai < a[i - 1]) { 2323 while (--i >= low && ai < a[i]) { 2324 a[i + 1] = a[i]; 2325 } 2326 a[i + 1] = ai; 2327 } 2328 } 2329 } 2330 2331 /** 2332 * The number of distinct short values. 2333 */ 2334 private static final int NUM_SHORT_VALUES = 1 << 16; 2335 2336 /** 2337 * Max index of short counter. 2338 */ 2339 private static final int MAX_SHORT_INDEX = Short.MAX_VALUE + NUM_SHORT_VALUES + 1; 2340 2341 /** 2342 * Sorts the specified range of the array using counting sort. 2343 * 2344 * @param a the array to be sorted 2345 * @param low the index of the first element, inclusive, to be sorted 2346 * @param high the index of the last element, exclusive, to be sorted 2347 */ countingSort(short[] a, int low, int high)2348 private static void countingSort(short[] a, int low, int high) { 2349 int[] count = new int[NUM_SHORT_VALUES]; 2350 2351 /* 2352 * Compute a histogram with the number of each values. 2353 */ 2354 for (int i = high; i > low; ++count[a[--i] & 0xFFFF]); 2355 2356 /* 2357 * Place values on their final positions. 2358 */ 2359 if (high - low > NUM_SHORT_VALUES) { 2360 for (int i = MAX_SHORT_INDEX; --i > Short.MAX_VALUE; ) { 2361 int value = i & 0xFFFF; 2362 2363 for (low = high - count[value]; high > low; 2364 a[--high] = (short) value 2365 ); 2366 } 2367 } else { 2368 for (int i = MAX_SHORT_INDEX; high > low; ) { 2369 while (count[--i & 0xFFFF] == 0); 2370 2371 int value = i & 0xFFFF; 2372 int c = count[value]; 2373 2374 do { 2375 a[--high] = (short) value; 2376 } while (--c > 0); 2377 } 2378 } 2379 } 2380 2381 // [float] 2382 2383 /** 2384 * Sorts the specified range of the array using parallel merge 2385 * sort and/or Dual-Pivot Quicksort. 2386 * 2387 * To balance the faster splitting and parallelism of merge sort 2388 * with the faster element partitioning of Quicksort, ranges are 2389 * subdivided in tiers such that, if there is enough parallelism, 2390 * the four-way parallel merge is started, still ensuring enough 2391 * parallelism to process the partitions. 2392 * 2393 * @param a the array to be sorted 2394 * @param parallelism the parallelism level 2395 * @param low the index of the first element, inclusive, to be sorted 2396 * @param high the index of the last element, exclusive, to be sorted 2397 */ sort(float[] a, int parallelism, int low, int high)2398 static void sort(float[] a, int parallelism, int low, int high) { 2399 /* 2400 * Phase 1. Count the number of negative zero -0.0f, 2401 * turn them into positive zero, and move all NaNs 2402 * to the end of the array. 2403 */ 2404 int numNegativeZero = 0; 2405 2406 for (int k = high; k > low; ) { 2407 float ak = a[--k]; 2408 2409 if (ak == 0.0f && Float.floatToRawIntBits(ak) < 0) { // ak is -0.0f 2410 numNegativeZero += 1; 2411 a[k] = 0.0f; 2412 } else if (ak != ak) { // ak is NaN 2413 a[k] = a[--high]; 2414 a[high] = ak; 2415 } 2416 } 2417 2418 /* 2419 * Phase 2. Sort everything except NaNs, 2420 * which are already in place. 2421 */ 2422 int size = high - low; 2423 2424 if (parallelism > 1 && size > MIN_PARALLEL_SORT_SIZE) { 2425 int depth = getDepth(parallelism, size >> 12); 2426 float[] b = depth == 0 ? null : new float[size]; 2427 new Sorter(null, a, b, low, size, low, depth).invoke(); 2428 } else { 2429 sort(null, a, 0, low, high); 2430 } 2431 2432 /* 2433 * Phase 3. Turn positive zero 0.0f 2434 * back into negative zero -0.0f. 2435 */ 2436 if (++numNegativeZero == 1) { 2437 return; 2438 } 2439 2440 /* 2441 * Find the position one less than 2442 * the index of the first zero. 2443 */ 2444 while (low <= high) { 2445 int middle = (low + high) >>> 1; 2446 2447 if (a[middle] < 0) { 2448 low = middle + 1; 2449 } else { 2450 high = middle - 1; 2451 } 2452 } 2453 2454 /* 2455 * Replace the required number of 0.0f by -0.0f. 2456 */ 2457 while (--numNegativeZero > 0) { 2458 a[++high] = -0.0f; 2459 } 2460 } 2461 2462 /** 2463 * Sorts the specified array using the Dual-Pivot Quicksort and/or 2464 * other sorts in special-cases, possibly with parallel partitions. 2465 * 2466 * @param sorter parallel context 2467 * @param a the array to be sorted 2468 * @param bits the combination of recursion depth and bit flag, where 2469 * the right bit "0" indicates that array is the leftmost part 2470 * @param low the index of the first element, inclusive, to be sorted 2471 * @param high the index of the last element, exclusive, to be sorted 2472 */ sort(Sorter sorter, float[] a, int bits, int low, int high)2473 static void sort(Sorter sorter, float[] a, int bits, int low, int high) { 2474 while (true) { 2475 int end = high - 1, size = high - low; 2476 2477 /* 2478 * Run mixed insertion sort on small non-leftmost parts. 2479 */ 2480 if (size < MAX_MIXED_INSERTION_SORT_SIZE + bits && (bits & 1) > 0) { 2481 mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high); 2482 return; 2483 } 2484 2485 /* 2486 * Invoke insertion sort on small leftmost part. 2487 */ 2488 if (size < MAX_INSERTION_SORT_SIZE) { 2489 insertionSort(a, low, high); 2490 return; 2491 } 2492 2493 /* 2494 * Check if the whole array or large non-leftmost 2495 * parts are nearly sorted and then merge runs. 2496 */ 2497 if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0) 2498 && tryMergeRuns(sorter, a, low, size)) { 2499 return; 2500 } 2501 2502 /* 2503 * Switch to heap sort if execution 2504 * time is becoming quadratic. 2505 */ 2506 if ((bits += DELTA) > MAX_RECURSION_DEPTH) { 2507 heapSort(a, low, high); 2508 return; 2509 } 2510 2511 /* 2512 * Use an inexpensive approximation of the golden ratio 2513 * to select five sample elements and determine pivots. 2514 */ 2515 int step = (size >> 3) * 3 + 3; 2516 2517 /* 2518 * Five elements around (and including) the central element 2519 * will be used for pivot selection as described below. The 2520 * unequal choice of spacing these elements was empirically 2521 * determined to work well on a wide variety of inputs. 2522 */ 2523 int e1 = low + step; 2524 int e5 = end - step; 2525 int e3 = (e1 + e5) >>> 1; 2526 int e2 = (e1 + e3) >>> 1; 2527 int e4 = (e3 + e5) >>> 1; 2528 float a3 = a[e3]; 2529 2530 /* 2531 * Sort these elements in place by the combination 2532 * of 4-element sorting network and insertion sort. 2533 * 2534 * 5 ------o-----------o------------ 2535 * | | 2536 * 4 ------|-----o-----o-----o------ 2537 * | | | 2538 * 2 ------o-----|-----o-----o------ 2539 * | | 2540 * 1 ------------o-----o------------ 2541 */ 2542 if (a[e5] < a[e2]) { float t = a[e5]; a[e5] = a[e2]; a[e2] = t; } 2543 if (a[e4] < a[e1]) { float t = a[e4]; a[e4] = a[e1]; a[e1] = t; } 2544 if (a[e5] < a[e4]) { float t = a[e5]; a[e5] = a[e4]; a[e4] = t; } 2545 if (a[e2] < a[e1]) { float t = a[e2]; a[e2] = a[e1]; a[e1] = t; } 2546 if (a[e4] < a[e2]) { float t = a[e4]; a[e4] = a[e2]; a[e2] = t; } 2547 2548 if (a3 < a[e2]) { 2549 if (a3 < a[e1]) { 2550 a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; 2551 } else { 2552 a[e3] = a[e2]; a[e2] = a3; 2553 } 2554 } else if (a3 > a[e4]) { 2555 if (a3 > a[e5]) { 2556 a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; 2557 } else { 2558 a[e3] = a[e4]; a[e4] = a3; 2559 } 2560 } 2561 2562 // Pointers 2563 int lower = low; // The index of the last element of the left part 2564 int upper = end; // The index of the first element of the right part 2565 2566 /* 2567 * Partitioning with 2 pivots in case of different elements. 2568 */ 2569 if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { 2570 2571 /* 2572 * Use the first and fifth of the five sorted elements as 2573 * the pivots. These values are inexpensive approximation 2574 * of tertiles. Note, that pivot1 < pivot2. 2575 */ 2576 float pivot1 = a[e1]; 2577 float pivot2 = a[e5]; 2578 2579 /* 2580 * The first and the last elements to be sorted are moved 2581 * to the locations formerly occupied by the pivots. When 2582 * partitioning is completed, the pivots are swapped back 2583 * into their final positions, and excluded from the next 2584 * subsequent sorting. 2585 */ 2586 a[e1] = a[lower]; 2587 a[e5] = a[upper]; 2588 2589 /* 2590 * Skip elements, which are less or greater than the pivots. 2591 */ 2592 while (a[++lower] < pivot1); 2593 while (a[--upper] > pivot2); 2594 2595 /* 2596 * Backward 3-interval partitioning 2597 * 2598 * left part central part right part 2599 * +------------------------------------------------------------+ 2600 * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | 2601 * +------------------------------------------------------------+ 2602 * ^ ^ ^ 2603 * | | | 2604 * lower k upper 2605 * 2606 * Invariants: 2607 * 2608 * all in (low, lower] < pivot1 2609 * pivot1 <= all in (k, upper) <= pivot2 2610 * all in [upper, end) > pivot2 2611 * 2612 * Pointer k is the last index of ?-part 2613 */ 2614 for (int unused = --lower, k = ++upper; --k > lower; ) { 2615 float ak = a[k]; 2616 2617 if (ak < pivot1) { // Move a[k] to the left side 2618 while (lower < k) { 2619 if (a[++lower] >= pivot1) { 2620 if (a[lower] > pivot2) { 2621 a[k] = a[--upper]; 2622 a[upper] = a[lower]; 2623 } else { 2624 a[k] = a[lower]; 2625 } 2626 a[lower] = ak; 2627 break; 2628 } 2629 } 2630 } else if (ak > pivot2) { // Move a[k] to the right side 2631 a[k] = a[--upper]; 2632 a[upper] = ak; 2633 } 2634 } 2635 2636 /* 2637 * Swap the pivots into their final positions. 2638 */ 2639 a[low] = a[lower]; a[lower] = pivot1; 2640 a[end] = a[upper]; a[upper] = pivot2; 2641 2642 /* 2643 * Sort non-left parts recursively (possibly in parallel), 2644 * excluding known pivots. 2645 */ 2646 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { 2647 sorter.forkSorter(bits | 1, lower + 1, upper); 2648 sorter.forkSorter(bits | 1, upper + 1, high); 2649 } else { 2650 sort(sorter, a, bits | 1, lower + 1, upper); 2651 sort(sorter, a, bits | 1, upper + 1, high); 2652 } 2653 2654 } else { // Use single pivot in case of many equal elements 2655 2656 /* 2657 * Use the third of the five sorted elements as the pivot. 2658 * This value is inexpensive approximation of the median. 2659 */ 2660 float pivot = a[e3]; 2661 2662 /* 2663 * The first element to be sorted is moved to the 2664 * location formerly occupied by the pivot. After 2665 * completion of partitioning the pivot is swapped 2666 * back into its final position, and excluded from 2667 * the next subsequent sorting. 2668 */ 2669 a[e3] = a[lower]; 2670 2671 /* 2672 * Traditional 3-way (Dutch National Flag) partitioning 2673 * 2674 * left part central part right part 2675 * +------------------------------------------------------+ 2676 * | < pivot | ? | == pivot | > pivot | 2677 * +------------------------------------------------------+ 2678 * ^ ^ ^ 2679 * | | | 2680 * lower k upper 2681 * 2682 * Invariants: 2683 * 2684 * all in (low, lower] < pivot 2685 * all in (k, upper) == pivot 2686 * all in [upper, end] > pivot 2687 * 2688 * Pointer k is the last index of ?-part 2689 */ 2690 for (int k = ++upper; --k > lower; ) { 2691 float ak = a[k]; 2692 2693 if (ak != pivot) { 2694 a[k] = pivot; 2695 2696 if (ak < pivot) { // Move a[k] to the left side 2697 while (a[++lower] < pivot); 2698 2699 if (a[lower] > pivot) { 2700 a[--upper] = a[lower]; 2701 } 2702 a[lower] = ak; 2703 } else { // ak > pivot - Move a[k] to the right side 2704 a[--upper] = ak; 2705 } 2706 } 2707 } 2708 2709 /* 2710 * Swap the pivot into its final position. 2711 */ 2712 a[low] = a[lower]; a[lower] = pivot; 2713 2714 /* 2715 * Sort the right part (possibly in parallel), excluding 2716 * known pivot. All elements from the central part are 2717 * equal and therefore already sorted. 2718 */ 2719 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { 2720 sorter.forkSorter(bits | 1, upper, high); 2721 } else { 2722 sort(sorter, a, bits | 1, upper, high); 2723 } 2724 } 2725 high = lower; // Iterate along the left part 2726 } 2727 } 2728 2729 /** 2730 * Sorts the specified range of the array using mixed insertion sort. 2731 * 2732 * Mixed insertion sort is combination of simple insertion sort, 2733 * pin insertion sort and pair insertion sort. 2734 * 2735 * In the context of Dual-Pivot Quicksort, the pivot element 2736 * from the left part plays the role of sentinel, because it 2737 * is less than any elements from the given part. Therefore, 2738 * expensive check of the left range can be skipped on each 2739 * iteration unless it is the leftmost call. 2740 * 2741 * @param a the array to be sorted 2742 * @param low the index of the first element, inclusive, to be sorted 2743 * @param end the index of the last element for simple insertion sort 2744 * @param high the index of the last element, exclusive, to be sorted 2745 */ mixedInsertionSort(float[] a, int low, int end, int high)2746 private static void mixedInsertionSort(float[] a, int low, int end, int high) { 2747 if (end == high) { 2748 2749 /* 2750 * Invoke simple insertion sort on tiny array. 2751 */ 2752 for (int i; ++low < end; ) { 2753 float ai = a[i = low]; 2754 2755 while (ai < a[--i]) { 2756 a[i + 1] = a[i]; 2757 } 2758 a[i + 1] = ai; 2759 } 2760 } else { 2761 2762 /* 2763 * Start with pin insertion sort on small part. 2764 * 2765 * Pin insertion sort is extended simple insertion sort. 2766 * The main idea of this sort is to put elements larger 2767 * than an element called pin to the end of array (the 2768 * proper area for such elements). It avoids expensive 2769 * movements of these elements through the whole array. 2770 */ 2771 float pin = a[end]; 2772 2773 for (int i, p = high; ++low < end; ) { 2774 float ai = a[i = low]; 2775 2776 if (ai < a[i - 1]) { // Small element 2777 2778 /* 2779 * Insert small element into sorted part. 2780 */ 2781 a[i] = a[--i]; 2782 2783 while (ai < a[--i]) { 2784 a[i + 1] = a[i]; 2785 } 2786 a[i + 1] = ai; 2787 2788 } else if (p > i && ai > pin) { // Large element 2789 2790 /* 2791 * Find element smaller than pin. 2792 */ 2793 while (a[--p] > pin); 2794 2795 /* 2796 * Swap it with large element. 2797 */ 2798 if (p > i) { 2799 ai = a[p]; 2800 a[p] = a[i]; 2801 } 2802 2803 /* 2804 * Insert small element into sorted part. 2805 */ 2806 while (ai < a[--i]) { 2807 a[i + 1] = a[i]; 2808 } 2809 a[i + 1] = ai; 2810 } 2811 } 2812 2813 /* 2814 * Continue with pair insertion sort on remain part. 2815 */ 2816 for (int i; low < high; ++low) { 2817 float a1 = a[i = low], a2 = a[++low]; 2818 2819 /* 2820 * Insert two elements per iteration: at first, insert the 2821 * larger element and then insert the smaller element, but 2822 * from the position where the larger element was inserted. 2823 */ 2824 if (a1 > a2) { 2825 2826 while (a1 < a[--i]) { 2827 a[i + 2] = a[i]; 2828 } 2829 a[++i + 1] = a1; 2830 2831 while (a2 < a[--i]) { 2832 a[i + 1] = a[i]; 2833 } 2834 a[i + 1] = a2; 2835 2836 } else if (a1 < a[i - 1]) { 2837 2838 while (a2 < a[--i]) { 2839 a[i + 2] = a[i]; 2840 } 2841 a[++i + 1] = a2; 2842 2843 while (a1 < a[--i]) { 2844 a[i + 1] = a[i]; 2845 } 2846 a[i + 1] = a1; 2847 } 2848 } 2849 } 2850 } 2851 2852 /** 2853 * Sorts the specified range of the array using insertion sort. 2854 * 2855 * @param a the array to be sorted 2856 * @param low the index of the first element, inclusive, to be sorted 2857 * @param high the index of the last element, exclusive, to be sorted 2858 */ insertionSort(float[] a, int low, int high)2859 private static void insertionSort(float[] a, int low, int high) { 2860 for (int i, k = low; ++k < high; ) { 2861 float ai = a[i = k]; 2862 2863 if (ai < a[i - 1]) { 2864 while (--i >= low && ai < a[i]) { 2865 a[i + 1] = a[i]; 2866 } 2867 a[i + 1] = ai; 2868 } 2869 } 2870 } 2871 2872 /** 2873 * Sorts the specified range of the array using heap sort. 2874 * 2875 * @param a the array to be sorted 2876 * @param low the index of the first element, inclusive, to be sorted 2877 * @param high the index of the last element, exclusive, to be sorted 2878 */ heapSort(float[] a, int low, int high)2879 private static void heapSort(float[] a, int low, int high) { 2880 for (int k = (low + high) >>> 1; k > low; ) { 2881 pushDown(a, --k, a[k], low, high); 2882 } 2883 while (--high > low) { 2884 float max = a[low]; 2885 pushDown(a, low, a[high], low, high); 2886 a[high] = max; 2887 } 2888 } 2889 2890 /** 2891 * Pushes specified element down during heap sort. 2892 * 2893 * @param a the given array 2894 * @param p the start index 2895 * @param value the given element 2896 * @param low the index of the first element, inclusive, to be sorted 2897 * @param high the index of the last element, exclusive, to be sorted 2898 */ pushDown(float[] a, int p, float value, int low, int high)2899 private static void pushDown(float[] a, int p, float value, int low, int high) { 2900 for (int k ;; a[p] = a[p = k]) { 2901 k = (p << 1) - low + 2; // Index of the right child 2902 2903 if (k > high) { 2904 break; 2905 } 2906 if (k == high || a[k] < a[k - 1]) { 2907 --k; 2908 } 2909 if (a[k] <= value) { 2910 break; 2911 } 2912 } 2913 a[p] = value; 2914 } 2915 2916 /** 2917 * Tries to sort the specified range of the array. 2918 * 2919 * @param sorter parallel context 2920 * @param a the array to be sorted 2921 * @param low the index of the first element to be sorted 2922 * @param size the array size 2923 * @return true if finally sorted, false otherwise 2924 */ tryMergeRuns(Sorter sorter, float[] a, int low, int size)2925 private static boolean tryMergeRuns(Sorter sorter, float[] a, int low, int size) { 2926 2927 /* 2928 * The run array is constructed only if initial runs are 2929 * long enough to continue, run[i] then holds start index 2930 * of the i-th sequence of elements in non-descending order. 2931 */ 2932 int[] run = null; 2933 int high = low + size; 2934 int count = 1, last = low; 2935 2936 /* 2937 * Identify all possible runs. 2938 */ 2939 for (int k = low + 1; k < high; ) { 2940 2941 /* 2942 * Find the end index of the current run. 2943 */ 2944 if (a[k - 1] < a[k]) { 2945 2946 // Identify ascending sequence 2947 while (++k < high && a[k - 1] <= a[k]); 2948 2949 } else if (a[k - 1] > a[k]) { 2950 2951 // Identify descending sequence 2952 while (++k < high && a[k - 1] >= a[k]); 2953 2954 // Reverse into ascending order 2955 for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) { 2956 float ai = a[i]; a[i] = a[j]; a[j] = ai; 2957 } 2958 } else { // Identify constant sequence 2959 for (float ak = a[k]; ++k < high && ak == a[k]; ); 2960 2961 if (k < high) { 2962 continue; 2963 } 2964 } 2965 2966 /* 2967 * Check special cases. 2968 */ 2969 if (run == null) { 2970 if (k == high) { 2971 2972 /* 2973 * The array is monotonous sequence, 2974 * and therefore already sorted. 2975 */ 2976 return true; 2977 } 2978 2979 if (k - low < MIN_FIRST_RUN_SIZE) { 2980 2981 /* 2982 * The first run is too small 2983 * to proceed with scanning. 2984 */ 2985 return false; 2986 } 2987 2988 run = new int[((size >> 10) | 0x7F) & 0x3FF]; 2989 run[0] = low; 2990 2991 } else if (a[last - 1] > a[last]) { 2992 2993 if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) { 2994 2995 /* 2996 * The first runs are not long 2997 * enough to continue scanning. 2998 */ 2999 return false; 3000 } 3001 3002 if (++count == MAX_RUN_CAPACITY) { 3003 3004 /* 3005 * Array is not highly structured. 3006 */ 3007 return false; 3008 } 3009 3010 if (count == run.length) { 3011 3012 /* 3013 * Increase capacity of index array. 3014 */ 3015 run = Arrays.copyOf(run, count << 1); 3016 } 3017 } 3018 run[count] = (last = k); 3019 } 3020 3021 /* 3022 * Merge runs of highly structured array. 3023 */ 3024 if (count > 1) { 3025 float[] b; int offset = low; 3026 3027 if (sorter == null || (b = (float[]) sorter.b) == null) { 3028 b = new float[size]; 3029 } else { 3030 offset = sorter.offset; 3031 } 3032 mergeRuns(a, b, offset, 1, sorter != null, run, 0, count); 3033 } 3034 return true; 3035 } 3036 3037 /** 3038 * Merges the specified runs. 3039 * 3040 * @param a the source array 3041 * @param b the temporary buffer used in merging 3042 * @param offset the start index in the source, inclusive 3043 * @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0) 3044 * @param parallel indicates whether merging is performed in parallel 3045 * @param run the start indexes of the runs, inclusive 3046 * @param lo the start index of the first run, inclusive 3047 * @param hi the start index of the last run, inclusive 3048 * @return the destination where runs are merged 3049 */ mergeRuns(float[] a, float[] b, int offset, int aim, boolean parallel, int[] run, int lo, int hi)3050 private static float[] mergeRuns(float[] a, float[] b, int offset, 3051 int aim, boolean parallel, int[] run, int lo, int hi) { 3052 3053 if (hi - lo == 1) { 3054 if (aim >= 0) { 3055 return a; 3056 } 3057 for (int i = run[hi], j = i - offset, low = run[lo]; i > low; 3058 b[--j] = a[--i] 3059 ); 3060 return b; 3061 } 3062 3063 /* 3064 * Split into approximately equal parts. 3065 */ 3066 int mi = lo, rmi = (run[lo] + run[hi]) >>> 1; 3067 while (run[++mi + 1] <= rmi); 3068 3069 /* 3070 * Merge the left and right parts. 3071 */ 3072 float[] a1, a2; 3073 3074 if (parallel && hi - lo > MIN_RUN_COUNT) { 3075 RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe(); 3076 a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi); 3077 a2 = (float[]) merger.getDestination(); 3078 } else { 3079 a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi); 3080 a2 = mergeRuns(a, b, offset, 0, false, run, mi, hi); 3081 } 3082 3083 float[] dst = a1 == a ? b : a; 3084 3085 int k = a1 == a ? run[lo] - offset : run[lo]; 3086 int lo1 = a1 == b ? run[lo] - offset : run[lo]; 3087 int hi1 = a1 == b ? run[mi] - offset : run[mi]; 3088 int lo2 = a2 == b ? run[mi] - offset : run[mi]; 3089 int hi2 = a2 == b ? run[hi] - offset : run[hi]; 3090 3091 if (parallel) { 3092 new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke(); 3093 } else { 3094 mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2); 3095 } 3096 return dst; 3097 } 3098 3099 /** 3100 * Merges the sorted parts. 3101 * 3102 * @param merger parallel context 3103 * @param dst the destination where parts are merged 3104 * @param k the start index of the destination, inclusive 3105 * @param a1 the first part 3106 * @param lo1 the start index of the first part, inclusive 3107 * @param hi1 the end index of the first part, exclusive 3108 * @param a2 the second part 3109 * @param lo2 the start index of the second part, inclusive 3110 * @param hi2 the end index of the second part, exclusive 3111 */ mergeParts(Merger merger, float[] dst, int k, float[] a1, int lo1, int hi1, float[] a2, int lo2, int hi2)3112 private static void mergeParts(Merger merger, float[] dst, int k, 3113 float[] a1, int lo1, int hi1, float[] a2, int lo2, int hi2) { 3114 3115 if (merger != null && a1 == a2) { 3116 3117 while (true) { 3118 3119 /* 3120 * The first part must be larger. 3121 */ 3122 if (hi1 - lo1 < hi2 - lo2) { 3123 int lo = lo1; lo1 = lo2; lo2 = lo; 3124 int hi = hi1; hi1 = hi2; hi2 = hi; 3125 } 3126 3127 /* 3128 * Small parts will be merged sequentially. 3129 */ 3130 if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) { 3131 break; 3132 } 3133 3134 /* 3135 * Find the median of the larger part. 3136 */ 3137 int mi1 = (lo1 + hi1) >>> 1; 3138 float key = a1[mi1]; 3139 int mi2 = hi2; 3140 3141 /* 3142 * Partition the smaller part. 3143 */ 3144 for (int loo = lo2; loo < mi2; ) { 3145 int t = (loo + mi2) >>> 1; 3146 3147 if (key > a2[t]) { 3148 loo = t + 1; 3149 } else { 3150 mi2 = t; 3151 } 3152 } 3153 3154 int d = mi2 - lo2 + mi1 - lo1; 3155 3156 /* 3157 * Merge the right sub-parts in parallel. 3158 */ 3159 merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2); 3160 3161 /* 3162 * Process the sub-left parts. 3163 */ 3164 hi1 = mi1; 3165 hi2 = mi2; 3166 } 3167 } 3168 3169 /* 3170 * Merge small parts sequentially. 3171 */ 3172 while (lo1 < hi1 && lo2 < hi2) { 3173 dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++]; 3174 } 3175 if (dst != a1 || k < lo1) { 3176 while (lo1 < hi1) { 3177 dst[k++] = a1[lo1++]; 3178 } 3179 } 3180 if (dst != a2 || k < lo2) { 3181 while (lo2 < hi2) { 3182 dst[k++] = a2[lo2++]; 3183 } 3184 } 3185 } 3186 3187 // [double] 3188 3189 /** 3190 * Sorts the specified range of the array using parallel merge 3191 * sort and/or Dual-Pivot Quicksort. 3192 * 3193 * To balance the faster splitting and parallelism of merge sort 3194 * with the faster element partitioning of Quicksort, ranges are 3195 * subdivided in tiers such that, if there is enough parallelism, 3196 * the four-way parallel merge is started, still ensuring enough 3197 * parallelism to process the partitions. 3198 * 3199 * @param a the array to be sorted 3200 * @param parallelism the parallelism level 3201 * @param low the index of the first element, inclusive, to be sorted 3202 * @param high the index of the last element, exclusive, to be sorted 3203 */ 3204 static void sort(double[] a, int parallelism, int low, int high) { 3205 /* 3206 * Phase 1. Count the number of negative zero -0.0d, 3207 * turn them into positive zero, and move all NaNs 3208 * to the end of the array. 3209 */ 3210 int numNegativeZero = 0; 3211 3212 for (int k = high; k > low; ) { 3213 double ak = a[--k]; 3214 3215 if (ak == 0.0d && Double.doubleToRawLongBits(ak) < 0) { // ak is -0.0d 3216 numNegativeZero += 1; 3217 a[k] = 0.0d; 3218 } else if (ak != ak) { // ak is NaN 3219 a[k] = a[--high]; 3220 a[high] = ak; 3221 } 3222 } 3223 3224 /* 3225 * Phase 2. Sort everything except NaNs, 3226 * which are already in place. 3227 */ 3228 int size = high - low; 3229 3230 if (parallelism > 1 && size > MIN_PARALLEL_SORT_SIZE) { 3231 int depth = getDepth(parallelism, size >> 12); 3232 double[] b = depth == 0 ? null : new double[size]; 3233 new Sorter(null, a, b, low, size, low, depth).invoke(); 3234 } else { 3235 sort(null, a, 0, low, high); 3236 } 3237 3238 /* 3239 * Phase 3. Turn positive zero 0.0d 3240 * back into negative zero -0.0d. 3241 */ 3242 if (++numNegativeZero == 1) { 3243 return; 3244 } 3245 3246 /* 3247 * Find the position one less than 3248 * the index of the first zero. 3249 */ 3250 while (low <= high) { 3251 int middle = (low + high) >>> 1; 3252 3253 if (a[middle] < 0) { 3254 low = middle + 1; 3255 } else { 3256 high = middle - 1; 3257 } 3258 } 3259 3260 /* 3261 * Replace the required number of 0.0d by -0.0d. 3262 */ 3263 while (--numNegativeZero > 0) { 3264 a[++high] = -0.0d; 3265 } 3266 } 3267 3268 /** 3269 * Sorts the specified array using the Dual-Pivot Quicksort and/or 3270 * other sorts in special-cases, possibly with parallel partitions. 3271 * 3272 * @param sorter parallel context 3273 * @param a the array to be sorted 3274 * @param bits the combination of recursion depth and bit flag, where 3275 * the right bit "0" indicates that array is the leftmost part 3276 * @param low the index of the first element, inclusive, to be sorted 3277 * @param high the index of the last element, exclusive, to be sorted 3278 */ sort(Sorter sorter, double[] a, int bits, int low, int high)3279 static void sort(Sorter sorter, double[] a, int bits, int low, int high) { 3280 while (true) { 3281 int end = high - 1, size = high - low; 3282 3283 /* 3284 * Run mixed insertion sort on small non-leftmost parts. 3285 */ 3286 if (size < MAX_MIXED_INSERTION_SORT_SIZE + bits && (bits & 1) > 0) { 3287 mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high); 3288 return; 3289 } 3290 3291 /* 3292 * Invoke insertion sort on small leftmost part. 3293 */ 3294 if (size < MAX_INSERTION_SORT_SIZE) { 3295 insertionSort(a, low, high); 3296 return; 3297 } 3298 3299 /* 3300 * Check if the whole array or large non-leftmost 3301 * parts are nearly sorted and then merge runs. 3302 */ 3303 if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0) 3304 && tryMergeRuns(sorter, a, low, size)) { 3305 return; 3306 } 3307 3308 /* 3309 * Switch to heap sort if execution 3310 * time is becoming quadratic. 3311 */ 3312 if ((bits += DELTA) > MAX_RECURSION_DEPTH) { 3313 heapSort(a, low, high); 3314 return; 3315 } 3316 3317 /* 3318 * Use an inexpensive approximation of the golden ratio 3319 * to select five sample elements and determine pivots. 3320 */ 3321 int step = (size >> 3) * 3 + 3; 3322 3323 /* 3324 * Five elements around (and including) the central element 3325 * will be used for pivot selection as described below. The 3326 * unequal choice of spacing these elements was empirically 3327 * determined to work well on a wide variety of inputs. 3328 */ 3329 int e1 = low + step; 3330 int e5 = end - step; 3331 int e3 = (e1 + e5) >>> 1; 3332 int e2 = (e1 + e3) >>> 1; 3333 int e4 = (e3 + e5) >>> 1; 3334 double a3 = a[e3]; 3335 3336 /* 3337 * Sort these elements in place by the combination 3338 * of 4-element sorting network and insertion sort. 3339 * 3340 * 5 ------o-----------o------------ 3341 * | | 3342 * 4 ------|-----o-----o-----o------ 3343 * | | | 3344 * 2 ------o-----|-----o-----o------ 3345 * | | 3346 * 1 ------------o-----o------------ 3347 */ 3348 if (a[e5] < a[e2]) { double t = a[e5]; a[e5] = a[e2]; a[e2] = t; } 3349 if (a[e4] < a[e1]) { double t = a[e4]; a[e4] = a[e1]; a[e1] = t; } 3350 if (a[e5] < a[e4]) { double t = a[e5]; a[e5] = a[e4]; a[e4] = t; } 3351 if (a[e2] < a[e1]) { double t = a[e2]; a[e2] = a[e1]; a[e1] = t; } 3352 if (a[e4] < a[e2]) { double t = a[e4]; a[e4] = a[e2]; a[e2] = t; } 3353 3354 if (a3 < a[e2]) { 3355 if (a3 < a[e1]) { 3356 a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; 3357 } else { 3358 a[e3] = a[e2]; a[e2] = a3; 3359 } 3360 } else if (a3 > a[e4]) { 3361 if (a3 > a[e5]) { 3362 a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; 3363 } else { 3364 a[e3] = a[e4]; a[e4] = a3; 3365 } 3366 } 3367 3368 // Pointers 3369 int lower = low; // The index of the last element of the left part 3370 int upper = end; // The index of the first element of the right part 3371 3372 /* 3373 * Partitioning with 2 pivots in case of different elements. 3374 */ 3375 if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { 3376 3377 /* 3378 * Use the first and fifth of the five sorted elements as 3379 * the pivots. These values are inexpensive approximation 3380 * of tertiles. Note, that pivot1 < pivot2. 3381 */ 3382 double pivot1 = a[e1]; 3383 double pivot2 = a[e5]; 3384 3385 /* 3386 * The first and the last elements to be sorted are moved 3387 * to the locations formerly occupied by the pivots. When 3388 * partitioning is completed, the pivots are swapped back 3389 * into their final positions, and excluded from the next 3390 * subsequent sorting. 3391 */ 3392 a[e1] = a[lower]; 3393 a[e5] = a[upper]; 3394 3395 /* 3396 * Skip elements, which are less or greater than the pivots. 3397 */ 3398 while (a[++lower] < pivot1); 3399 while (a[--upper] > pivot2); 3400 3401 /* 3402 * Backward 3-interval partitioning 3403 * 3404 * left part central part right part 3405 * +------------------------------------------------------------+ 3406 * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | 3407 * +------------------------------------------------------------+ 3408 * ^ ^ ^ 3409 * | | | 3410 * lower k upper 3411 * 3412 * Invariants: 3413 * 3414 * all in (low, lower] < pivot1 3415 * pivot1 <= all in (k, upper) <= pivot2 3416 * all in [upper, end) > pivot2 3417 * 3418 * Pointer k is the last index of ?-part 3419 */ 3420 for (int unused = --lower, k = ++upper; --k > lower; ) { 3421 double ak = a[k]; 3422 3423 if (ak < pivot1) { // Move a[k] to the left side 3424 while (lower < k) { 3425 if (a[++lower] >= pivot1) { 3426 if (a[lower] > pivot2) { 3427 a[k] = a[--upper]; 3428 a[upper] = a[lower]; 3429 } else { 3430 a[k] = a[lower]; 3431 } 3432 a[lower] = ak; 3433 break; 3434 } 3435 } 3436 } else if (ak > pivot2) { // Move a[k] to the right side 3437 a[k] = a[--upper]; 3438 a[upper] = ak; 3439 } 3440 } 3441 3442 /* 3443 * Swap the pivots into their final positions. 3444 */ 3445 a[low] = a[lower]; a[lower] = pivot1; 3446 a[end] = a[upper]; a[upper] = pivot2; 3447 3448 /* 3449 * Sort non-left parts recursively (possibly in parallel), 3450 * excluding known pivots. 3451 */ 3452 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { 3453 sorter.forkSorter(bits | 1, lower + 1, upper); 3454 sorter.forkSorter(bits | 1, upper + 1, high); 3455 } else { 3456 sort(sorter, a, bits | 1, lower + 1, upper); 3457 sort(sorter, a, bits | 1, upper + 1, high); 3458 } 3459 3460 } else { // Use single pivot in case of many equal elements 3461 3462 /* 3463 * Use the third of the five sorted elements as the pivot. 3464 * This value is inexpensive approximation of the median. 3465 */ 3466 double pivot = a[e3]; 3467 3468 /* 3469 * The first element to be sorted is moved to the 3470 * location formerly occupied by the pivot. After 3471 * completion of partitioning the pivot is swapped 3472 * back into its final position, and excluded from 3473 * the next subsequent sorting. 3474 */ 3475 a[e3] = a[lower]; 3476 3477 /* 3478 * Traditional 3-way (Dutch National Flag) partitioning 3479 * 3480 * left part central part right part 3481 * +------------------------------------------------------+ 3482 * | < pivot | ? | == pivot | > pivot | 3483 * +------------------------------------------------------+ 3484 * ^ ^ ^ 3485 * | | | 3486 * lower k upper 3487 * 3488 * Invariants: 3489 * 3490 * all in (low, lower] < pivot 3491 * all in (k, upper) == pivot 3492 * all in [upper, end] > pivot 3493 * 3494 * Pointer k is the last index of ?-part 3495 */ 3496 for (int k = ++upper; --k > lower; ) { 3497 double ak = a[k]; 3498 3499 if (ak != pivot) { 3500 a[k] = pivot; 3501 3502 if (ak < pivot) { // Move a[k] to the left side 3503 while (a[++lower] < pivot); 3504 3505 if (a[lower] > pivot) { 3506 a[--upper] = a[lower]; 3507 } 3508 a[lower] = ak; 3509 } else { // ak > pivot - Move a[k] to the right side 3510 a[--upper] = ak; 3511 } 3512 } 3513 } 3514 3515 /* 3516 * Swap the pivot into its final position. 3517 */ 3518 a[low] = a[lower]; a[lower] = pivot; 3519 3520 /* 3521 * Sort the right part (possibly in parallel), excluding 3522 * known pivot. All elements from the central part are 3523 * equal and therefore already sorted. 3524 */ 3525 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { 3526 sorter.forkSorter(bits | 1, upper, high); 3527 } else { 3528 sort(sorter, a, bits | 1, upper, high); 3529 } 3530 } 3531 high = lower; // Iterate along the left part 3532 } 3533 } 3534 3535 /** 3536 * Sorts the specified range of the array using mixed insertion sort. 3537 * 3538 * Mixed insertion sort is combination of simple insertion sort, 3539 * pin insertion sort and pair insertion sort. 3540 * 3541 * In the context of Dual-Pivot Quicksort, the pivot element 3542 * from the left part plays the role of sentinel, because it 3543 * is less than any elements from the given part. Therefore, 3544 * expensive check of the left range can be skipped on each 3545 * iteration unless it is the leftmost call. 3546 * 3547 * @param a the array to be sorted 3548 * @param low the index of the first element, inclusive, to be sorted 3549 * @param end the index of the last element for simple insertion sort 3550 * @param high the index of the last element, exclusive, to be sorted 3551 */ mixedInsertionSort(double[] a, int low, int end, int high)3552 private static void mixedInsertionSort(double[] a, int low, int end, int high) { 3553 if (end == high) { 3554 3555 /* 3556 * Invoke simple insertion sort on tiny array. 3557 */ 3558 for (int i; ++low < end; ) { 3559 double ai = a[i = low]; 3560 3561 while (ai < a[--i]) { 3562 a[i + 1] = a[i]; 3563 } 3564 a[i + 1] = ai; 3565 } 3566 } else { 3567 3568 /* 3569 * Start with pin insertion sort on small part. 3570 * 3571 * Pin insertion sort is extended simple insertion sort. 3572 * The main idea of this sort is to put elements larger 3573 * than an element called pin to the end of array (the 3574 * proper area for such elements). It avoids expensive 3575 * movements of these elements through the whole array. 3576 */ 3577 double pin = a[end]; 3578 3579 for (int i, p = high; ++low < end; ) { 3580 double ai = a[i = low]; 3581 3582 if (ai < a[i - 1]) { // Small element 3583 3584 /* 3585 * Insert small element into sorted part. 3586 */ 3587 a[i] = a[--i]; 3588 3589 while (ai < a[--i]) { 3590 a[i + 1] = a[i]; 3591 } 3592 a[i + 1] = ai; 3593 3594 } else if (p > i && ai > pin) { // Large element 3595 3596 /* 3597 * Find element smaller than pin. 3598 */ 3599 while (a[--p] > pin); 3600 3601 /* 3602 * Swap it with large element. 3603 */ 3604 if (p > i) { 3605 ai = a[p]; 3606 a[p] = a[i]; 3607 } 3608 3609 /* 3610 * Insert small element into sorted part. 3611 */ 3612 while (ai < a[--i]) { 3613 a[i + 1] = a[i]; 3614 } 3615 a[i + 1] = ai; 3616 } 3617 } 3618 3619 /* 3620 * Continue with pair insertion sort on remain part. 3621 */ 3622 for (int i; low < high; ++low) { 3623 double a1 = a[i = low], a2 = a[++low]; 3624 3625 /* 3626 * Insert two elements per iteration: at first, insert the 3627 * larger element and then insert the smaller element, but 3628 * from the position where the larger element was inserted. 3629 */ 3630 if (a1 > a2) { 3631 3632 while (a1 < a[--i]) { 3633 a[i + 2] = a[i]; 3634 } 3635 a[++i + 1] = a1; 3636 3637 while (a2 < a[--i]) { 3638 a[i + 1] = a[i]; 3639 } 3640 a[i + 1] = a2; 3641 3642 } else if (a1 < a[i - 1]) { 3643 3644 while (a2 < a[--i]) { 3645 a[i + 2] = a[i]; 3646 } 3647 a[++i + 1] = a2; 3648 3649 while (a1 < a[--i]) { 3650 a[i + 1] = a[i]; 3651 } 3652 a[i + 1] = a1; 3653 } 3654 } 3655 } 3656 } 3657 3658 /** 3659 * Sorts the specified range of the array using insertion sort. 3660 * 3661 * @param a the array to be sorted 3662 * @param low the index of the first element, inclusive, to be sorted 3663 * @param high the index of the last element, exclusive, to be sorted 3664 */ insertionSort(double[] a, int low, int high)3665 private static void insertionSort(double[] a, int low, int high) { 3666 for (int i, k = low; ++k < high; ) { 3667 double ai = a[i = k]; 3668 3669 if (ai < a[i - 1]) { 3670 while (--i >= low && ai < a[i]) { 3671 a[i + 1] = a[i]; 3672 } 3673 a[i + 1] = ai; 3674 } 3675 } 3676 } 3677 3678 /** 3679 * Sorts the specified range of the array using heap sort. 3680 * 3681 * @param a the array to be sorted 3682 * @param low the index of the first element, inclusive, to be sorted 3683 * @param high the index of the last element, exclusive, to be sorted 3684 */ heapSort(double[] a, int low, int high)3685 private static void heapSort(double[] a, int low, int high) { 3686 for (int k = (low + high) >>> 1; k > low; ) { 3687 pushDown(a, --k, a[k], low, high); 3688 } 3689 while (--high > low) { 3690 double max = a[low]; 3691 pushDown(a, low, a[high], low, high); 3692 a[high] = max; 3693 } 3694 } 3695 3696 /** 3697 * Pushes specified element down during heap sort. 3698 * 3699 * @param a the given array 3700 * @param p the start index 3701 * @param value the given element 3702 * @param low the index of the first element, inclusive, to be sorted 3703 * @param high the index of the last element, exclusive, to be sorted 3704 */ pushDown(double[] a, int p, double value, int low, int high)3705 private static void pushDown(double[] a, int p, double value, int low, int high) { 3706 for (int k ;; a[p] = a[p = k]) { 3707 k = (p << 1) - low + 2; // Index of the right child 3708 3709 if (k > high) { 3710 break; 3711 } 3712 if (k == high || a[k] < a[k - 1]) { 3713 --k; 3714 } 3715 if (a[k] <= value) { 3716 break; 3717 } 3718 } 3719 a[p] = value; 3720 } 3721 3722 /** 3723 * Tries to sort the specified range of the array. 3724 * 3725 * @param sorter parallel context 3726 * @param a the array to be sorted 3727 * @param low the index of the first element to be sorted 3728 * @param size the array size 3729 * @return true if finally sorted, false otherwise 3730 */ tryMergeRuns(Sorter sorter, double[] a, int low, int size)3731 private static boolean tryMergeRuns(Sorter sorter, double[] a, int low, int size) { 3732 3733 /* 3734 * The run array is constructed only if initial runs are 3735 * long enough to continue, run[i] then holds start index 3736 * of the i-th sequence of elements in non-descending order. 3737 */ 3738 int[] run = null; 3739 int high = low + size; 3740 int count = 1, last = low; 3741 3742 /* 3743 * Identify all possible runs. 3744 */ 3745 for (int k = low + 1; k < high; ) { 3746 3747 /* 3748 * Find the end index of the current run. 3749 */ 3750 if (a[k - 1] < a[k]) { 3751 3752 // Identify ascending sequence 3753 while (++k < high && a[k - 1] <= a[k]); 3754 3755 } else if (a[k - 1] > a[k]) { 3756 3757 // Identify descending sequence 3758 while (++k < high && a[k - 1] >= a[k]); 3759 3760 // Reverse into ascending order 3761 for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) { 3762 double ai = a[i]; a[i] = a[j]; a[j] = ai; 3763 } 3764 } else { // Identify constant sequence 3765 for (double ak = a[k]; ++k < high && ak == a[k]; ); 3766 3767 if (k < high) { 3768 continue; 3769 } 3770 } 3771 3772 /* 3773 * Check special cases. 3774 */ 3775 if (run == null) { 3776 if (k == high) { 3777 3778 /* 3779 * The array is monotonous sequence, 3780 * and therefore already sorted. 3781 */ 3782 return true; 3783 } 3784 3785 if (k - low < MIN_FIRST_RUN_SIZE) { 3786 3787 /* 3788 * The first run is too small 3789 * to proceed with scanning. 3790 */ 3791 return false; 3792 } 3793 3794 run = new int[((size >> 10) | 0x7F) & 0x3FF]; 3795 run[0] = low; 3796 3797 } else if (a[last - 1] > a[last]) { 3798 3799 if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) { 3800 3801 /* 3802 * The first runs are not long 3803 * enough to continue scanning. 3804 */ 3805 return false; 3806 } 3807 3808 if (++count == MAX_RUN_CAPACITY) { 3809 3810 /* 3811 * Array is not highly structured. 3812 */ 3813 return false; 3814 } 3815 3816 if (count == run.length) { 3817 3818 /* 3819 * Increase capacity of index array. 3820 */ 3821 run = Arrays.copyOf(run, count << 1); 3822 } 3823 } 3824 run[count] = (last = k); 3825 } 3826 3827 /* 3828 * Merge runs of highly structured array. 3829 */ 3830 if (count > 1) { 3831 double[] b; int offset = low; 3832 3833 if (sorter == null || (b = (double[]) sorter.b) == null) { 3834 b = new double[size]; 3835 } else { 3836 offset = sorter.offset; 3837 } 3838 mergeRuns(a, b, offset, 1, sorter != null, run, 0, count); 3839 } 3840 return true; 3841 } 3842 3843 /** 3844 * Merges the specified runs. 3845 * 3846 * @param a the source array 3847 * @param b the temporary buffer used in merging 3848 * @param offset the start index in the source, inclusive 3849 * @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0) 3850 * @param parallel indicates whether merging is performed in parallel 3851 * @param run the start indexes of the runs, inclusive 3852 * @param lo the start index of the first run, inclusive 3853 * @param hi the start index of the last run, inclusive 3854 * @return the destination where runs are merged 3855 */ mergeRuns(double[] a, double[] b, int offset, int aim, boolean parallel, int[] run, int lo, int hi)3856 private static double[] mergeRuns(double[] a, double[] b, int offset, 3857 int aim, boolean parallel, int[] run, int lo, int hi) { 3858 3859 if (hi - lo == 1) { 3860 if (aim >= 0) { 3861 return a; 3862 } 3863 for (int i = run[hi], j = i - offset, low = run[lo]; i > low; 3864 b[--j] = a[--i] 3865 ); 3866 return b; 3867 } 3868 3869 /* 3870 * Split into approximately equal parts. 3871 */ 3872 int mi = lo, rmi = (run[lo] + run[hi]) >>> 1; 3873 while (run[++mi + 1] <= rmi); 3874 3875 /* 3876 * Merge the left and right parts. 3877 */ 3878 double[] a1, a2; 3879 3880 if (parallel && hi - lo > MIN_RUN_COUNT) { 3881 RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe(); 3882 a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi); 3883 a2 = (double[]) merger.getDestination(); 3884 } else { 3885 a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi); 3886 a2 = mergeRuns(a, b, offset, 0, false, run, mi, hi); 3887 } 3888 3889 double[] dst = a1 == a ? b : a; 3890 3891 int k = a1 == a ? run[lo] - offset : run[lo]; 3892 int lo1 = a1 == b ? run[lo] - offset : run[lo]; 3893 int hi1 = a1 == b ? run[mi] - offset : run[mi]; 3894 int lo2 = a2 == b ? run[mi] - offset : run[mi]; 3895 int hi2 = a2 == b ? run[hi] - offset : run[hi]; 3896 3897 if (parallel) { 3898 new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke(); 3899 } else { 3900 mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2); 3901 } 3902 return dst; 3903 } 3904 3905 /** 3906 * Merges the sorted parts. 3907 * 3908 * @param merger parallel context 3909 * @param dst the destination where parts are merged 3910 * @param k the start index of the destination, inclusive 3911 * @param a1 the first part 3912 * @param lo1 the start index of the first part, inclusive 3913 * @param hi1 the end index of the first part, exclusive 3914 * @param a2 the second part 3915 * @param lo2 the start index of the second part, inclusive 3916 * @param hi2 the end index of the second part, exclusive 3917 */ mergeParts(Merger merger, double[] dst, int k, double[] a1, int lo1, int hi1, double[] a2, int lo2, int hi2)3918 private static void mergeParts(Merger merger, double[] dst, int k, 3919 double[] a1, int lo1, int hi1, double[] a2, int lo2, int hi2) { 3920 3921 if (merger != null && a1 == a2) { 3922 3923 while (true) { 3924 3925 /* 3926 * The first part must be larger. 3927 */ 3928 if (hi1 - lo1 < hi2 - lo2) { 3929 int lo = lo1; lo1 = lo2; lo2 = lo; 3930 int hi = hi1; hi1 = hi2; hi2 = hi; 3931 } 3932 3933 /* 3934 * Small parts will be merged sequentially. 3935 */ 3936 if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) { 3937 break; 3938 } 3939 3940 /* 3941 * Find the median of the larger part. 3942 */ 3943 int mi1 = (lo1 + hi1) >>> 1; 3944 double key = a1[mi1]; 3945 int mi2 = hi2; 3946 3947 /* 3948 * Partition the smaller part. 3949 */ 3950 for (int loo = lo2; loo < mi2; ) { 3951 int t = (loo + mi2) >>> 1; 3952 3953 if (key > a2[t]) { 3954 loo = t + 1; 3955 } else { 3956 mi2 = t; 3957 } 3958 } 3959 3960 int d = mi2 - lo2 + mi1 - lo1; 3961 3962 /* 3963 * Merge the right sub-parts in parallel. 3964 */ 3965 merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2); 3966 3967 /* 3968 * Process the sub-left parts. 3969 */ 3970 hi1 = mi1; 3971 hi2 = mi2; 3972 } 3973 } 3974 3975 /* 3976 * Merge small parts sequentially. 3977 */ 3978 while (lo1 < hi1 && lo2 < hi2) { 3979 dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++]; 3980 } 3981 if (dst != a1 || k < lo1) { 3982 while (lo1 < hi1) { 3983 dst[k++] = a1[lo1++]; 3984 } 3985 } 3986 if (dst != a2 || k < lo2) { 3987 while (lo2 < hi2) { 3988 dst[k++] = a2[lo2++]; 3989 } 3990 } 3991 } 3992 3993 // [class] 3994 3995 /** 3996 * This class implements parallel sorting. 3997 */ 3998 private static final class Sorter extends CountedCompleter<Void> { 3999 private static final long serialVersionUID = 20180818L; 4000 private final Object a, b; 4001 private final int low, size, offset, depth; 4002 4003 private Sorter(CountedCompleter<?> parent, 4004 Object a, Object b, int low, int size, int offset, int depth) { 4005 super(parent); 4006 this.a = a; 4007 this.b = b; 4008 this.low = low; 4009 this.size = size; 4010 this.offset = offset; 4011 this.depth = depth; 4012 } 4013 4014 @Override 4015 public final void compute() { 4016 if (depth < 0) { 4017 setPendingCount(2); 4018 int half = size >> 1; 4019 new Sorter(this, b, a, low, half, offset, depth + 1).fork(); 4020 new Sorter(this, b, a, low + half, size - half, offset, depth + 1).compute(); 4021 } else { 4022 if (a instanceof int[]) { 4023 sort(this, (int[]) a, depth, low, low + size); 4024 } else if (a instanceof long[]) { 4025 sort(this, (long[]) a, depth, low, low + size); 4026 } else if (a instanceof float[]) { 4027 sort(this, (float[]) a, depth, low, low + size); 4028 } else if (a instanceof double[]) { 4029 sort(this, (double[]) a, depth, low, low + size); 4030 } else { 4031 throw new IllegalArgumentException( 4032 "Unknown type of array: " + a.getClass().getName()); 4033 } 4034 } 4035 tryComplete(); 4036 } 4037 4038 @Override 4039 public final void onCompletion(CountedCompleter<?> caller) { 4040 if (depth < 0) { 4041 int mi = low + (size >> 1); 4042 boolean src = (depth & 1) == 0; 4043 4044 new Merger(null, 4045 a, 4046 src ? low : low - offset, 4047 b, 4048 src ? low - offset : low, 4049 src ? mi - offset : mi, 4050 b, 4051 src ? mi - offset : mi, 4052 src ? low + size - offset : low + size 4053 ).invoke(); 4054 } 4055 } 4056 4057 private void forkSorter(int depth, int low, int high) { 4058 addToPendingCount(1); 4059 Object a = this.a; // Use local variable for performance 4060 new Sorter(this, a, b, low, high - low, offset, depth).fork(); 4061 } 4062 } 4063 4064 /** 4065 * This class implements parallel merging. 4066 */ 4067 private static final class Merger extends CountedCompleter<Void> { 4068 private static final long serialVersionUID = 20180818L; 4069 private final Object dst, a1, a2; 4070 private final int k, lo1, hi1, lo2, hi2; 4071 4072 private Merger(CountedCompleter<?> parent, Object dst, int k, 4073 Object a1, int lo1, int hi1, Object a2, int lo2, int hi2) { 4074 super(parent); 4075 this.dst = dst; 4076 this.k = k; 4077 this.a1 = a1; 4078 this.lo1 = lo1; 4079 this.hi1 = hi1; 4080 this.a2 = a2; 4081 this.lo2 = lo2; 4082 this.hi2 = hi2; 4083 } 4084 4085 @Override 4086 public final void compute() { 4087 if (dst instanceof int[]) { 4088 mergeParts(this, (int[]) dst, k, 4089 (int[]) a1, lo1, hi1, (int[]) a2, lo2, hi2); 4090 } else if (dst instanceof long[]) { 4091 mergeParts(this, (long[]) dst, k, 4092 (long[]) a1, lo1, hi1, (long[]) a2, lo2, hi2); 4093 } else if (dst instanceof float[]) { 4094 mergeParts(this, (float[]) dst, k, 4095 (float[]) a1, lo1, hi1, (float[]) a2, lo2, hi2); 4096 } else if (dst instanceof double[]) { 4097 mergeParts(this, (double[]) dst, k, 4098 (double[]) a1, lo1, hi1, (double[]) a2, lo2, hi2); 4099 } else { 4100 throw new IllegalArgumentException( 4101 "Unknown type of array: " + dst.getClass().getName()); 4102 } 4103 propagateCompletion(); 4104 } 4105 4106 private void forkMerger(Object dst, int k, 4107 Object a1, int lo1, int hi1, Object a2, int lo2, int hi2) { 4108 addToPendingCount(1); 4109 new Merger(this, dst, k, a1, lo1, hi1, a2, lo2, hi2).fork(); 4110 } 4111 } 4112 4113 /** 4114 * This class implements parallel merging of runs. 4115 */ 4116 private static final class RunMerger extends RecursiveTask<Object> { 4117 private static final long serialVersionUID = 20180818L; 4118 private final Object a, b; 4119 private final int[] run; 4120 private final int offset, aim, lo, hi; 4121 4122 private RunMerger(Object a, Object b, int offset, 4123 int aim, int[] run, int lo, int hi) { 4124 this.a = a; 4125 this.b = b; 4126 this.offset = offset; 4127 this.aim = aim; 4128 this.run = run; 4129 this.lo = lo; 4130 this.hi = hi; 4131 } 4132 4133 @Override 4134 protected final Object compute() { 4135 if (a instanceof int[]) { 4136 return mergeRuns((int[]) a, (int[]) b, offset, aim, true, run, lo, hi); 4137 } 4138 if (a instanceof long[]) { 4139 return mergeRuns((long[]) a, (long[]) b, offset, aim, true, run, lo, hi); 4140 } 4141 if (a instanceof float[]) { 4142 return mergeRuns((float[]) a, (float[]) b, offset, aim, true, run, lo, hi); 4143 } 4144 if (a instanceof double[]) { 4145 return mergeRuns((double[]) a, (double[]) b, offset, aim, true, run, lo, hi); 4146 } 4147 throw new IllegalArgumentException( 4148 "Unknown type of array: " + a.getClass().getName()); 4149 } 4150 4151 private RunMerger forkMe() { 4152 fork(); 4153 return this; 4154 } 4155 4156 private Object getDestination() { 4157 join(); 4158 return getRawResult(); 4159 } 4160 } 4161 } 4162