1 /*
2  * Copyright (c) 2009, 2019, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package java.util;
27 
28 import java.util.concurrent.CountedCompleter;
29 import java.util.concurrent.RecursiveTask;
30 
31 /**
32  * This class implements powerful and fully optimized versions, both
33  * sequential and parallel, of the Dual-Pivot Quicksort algorithm by
34  * Vladimir Yaroslavskiy, Jon Bentley and Josh Bloch. This algorithm
35  * offers O(n log(n)) performance on all data sets, and is typically
36  * faster than traditional (one-pivot) Quicksort implementations.
37  *
38  * There are also additional algorithms, invoked from the Dual-Pivot
39  * Quicksort, such as mixed insertion sort, merging of runs and heap
40  * sort, counting sort and parallel merge sort.
41  *
42  * @author Vladimir Yaroslavskiy
43  * @author Jon Bentley
44  * @author Josh Bloch
45  * @author Doug Lea
46  *
47  * @version 2018.08.18
48  *
49  * @since 1.7 * 14
50  */
51 final class DualPivotQuicksort {
52 
53     /**
54      * Prevents instantiation.
55      */
DualPivotQuicksort()56     private DualPivotQuicksort() {}
57 
58     /**
59      * Max array size to use mixed insertion sort.
60      */
61     private static final int MAX_MIXED_INSERTION_SORT_SIZE = 65;
62 
63     /**
64      * Max array size to use insertion sort.
65      */
66     private static final int MAX_INSERTION_SORT_SIZE = 44;
67 
68     /**
69      * Min array size to perform sorting in parallel.
70      */
71     private static final int MIN_PARALLEL_SORT_SIZE = 4 << 10;
72 
73     /**
74      * Min array size to try merging of runs.
75      */
76     private static final int MIN_TRY_MERGE_SIZE = 4 << 10;
77 
78     /**
79      * Min size of the first run to continue with scanning.
80      */
81     private static final int MIN_FIRST_RUN_SIZE = 16;
82 
83     /**
84      * Min factor for the first runs to continue scanning.
85      */
86     private static final int MIN_FIRST_RUNS_FACTOR = 7;
87 
88     /**
89      * Max capacity of the index array for tracking runs.
90      */
91     private static final int MAX_RUN_CAPACITY = 5 << 10;
92 
93     /**
94      * Min number of runs, required by parallel merging.
95      */
96     private static final int MIN_RUN_COUNT = 4;
97 
98     /**
99      * Min array size to use parallel merging of parts.
100      */
101     private static final int MIN_PARALLEL_MERGE_PARTS_SIZE = 4 << 10;
102 
103     /**
104      * Min size of a byte array to use counting sort.
105      */
106     private static final int MIN_BYTE_COUNTING_SORT_SIZE = 64;
107 
108     /**
109      * Min size of a short or char array to use counting sort.
110      */
111     private static final int MIN_SHORT_OR_CHAR_COUNTING_SORT_SIZE = 1750;
112 
113     /**
114      * Threshold of mixed insertion sort is incremented by this value.
115      */
116     private static final int DELTA = 3 << 1;
117 
118     /**
119      * Max recursive partitioning depth before using heap sort.
120      */
121     private static final int MAX_RECURSION_DEPTH = 64 * DELTA;
122 
123     /**
124      * Calculates the double depth of parallel merging.
125      * Depth is negative, if tasks split before sorting.
126      *
127      * @param parallelism the parallelism level
128      * @param size the target size
129      * @return the depth of parallel merging
130      */
getDepth(int parallelism, int size)131     private static int getDepth(int parallelism, int size) {
132         int depth = 0;
133 
134         while ((parallelism >>= 3) > 0 && (size >>= 2) > 0) {
135             depth -= 2;
136         }
137         return depth;
138     }
139 
140     /**
141      * Sorts the specified range of the array using parallel merge
142      * sort and/or Dual-Pivot Quicksort.
143      *
144      * To balance the faster splitting and parallelism of merge sort
145      * with the faster element partitioning of Quicksort, ranges are
146      * subdivided in tiers such that, if there is enough parallelism,
147      * the four-way parallel merge is started, still ensuring enough
148      * parallelism to process the partitions.
149      *
150      * @param a the array to be sorted
151      * @param parallelism the parallelism level
152      * @param low the index of the first element, inclusive, to be sorted
153      * @param high the index of the last element, exclusive, to be sorted
154      */
sort(int[] a, int parallelism, int low, int high)155     static void sort(int[] a, int parallelism, int low, int high) {
156         int size = high - low;
157 
158         if (parallelism > 1 && size > MIN_PARALLEL_SORT_SIZE) {
159             int depth = getDepth(parallelism, size >> 12);
160             int[] b = depth == 0 ? null : new int[size];
161             new Sorter(null, a, b, low, size, low, depth).invoke();
162         } else {
163             sort(null, a, 0, low, high);
164         }
165     }
166 
167     /**
168      * Sorts the specified array using the Dual-Pivot Quicksort and/or
169      * other sorts in special-cases, possibly with parallel partitions.
170      *
171      * @param sorter parallel context
172      * @param a the array to be sorted
173      * @param bits the combination of recursion depth and bit flag, where
174      *        the right bit "0" indicates that array is the leftmost part
175      * @param low the index of the first element, inclusive, to be sorted
176      * @param high the index of the last element, exclusive, to be sorted
177      */
sort(Sorter sorter, int[] a, int bits, int low, int high)178     static void sort(Sorter sorter, int[] a, int bits, int low, int high) {
179         while (true) {
180             int end = high - 1, size = high - low;
181 
182             /*
183              * Run mixed insertion sort on small non-leftmost parts.
184              */
185             if (size < MAX_MIXED_INSERTION_SORT_SIZE + bits && (bits & 1) > 0) {
186                 mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high);
187                 return;
188             }
189 
190             /*
191              * Invoke insertion sort on small leftmost part.
192              */
193             if (size < MAX_INSERTION_SORT_SIZE) {
194                 insertionSort(a, low, high);
195                 return;
196             }
197 
198             /*
199              * Check if the whole array or large non-leftmost
200              * parts are nearly sorted and then merge runs.
201              */
202             if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0)
203                     && tryMergeRuns(sorter, a, low, size)) {
204                 return;
205             }
206 
207             /*
208              * Switch to heap sort if execution
209              * time is becoming quadratic.
210              */
211             if ((bits += DELTA) > MAX_RECURSION_DEPTH) {
212                 heapSort(a, low, high);
213                 return;
214             }
215 
216             /*
217              * Use an inexpensive approximation of the golden ratio
218              * to select five sample elements and determine pivots.
219              */
220             int step = (size >> 3) * 3 + 3;
221 
222             /*
223              * Five elements around (and including) the central element
224              * will be used for pivot selection as described below. The
225              * unequal choice of spacing these elements was empirically
226              * determined to work well on a wide variety of inputs.
227              */
228             int e1 = low + step;
229             int e5 = end - step;
230             int e3 = (e1 + e5) >>> 1;
231             int e2 = (e1 + e3) >>> 1;
232             int e4 = (e3 + e5) >>> 1;
233             int a3 = a[e3];
234 
235             /*
236              * Sort these elements in place by the combination
237              * of 4-element sorting network and insertion sort.
238              *
239              *    5 ------o-----------o------------
240              *            |           |
241              *    4 ------|-----o-----o-----o------
242              *            |     |           |
243              *    2 ------o-----|-----o-----o------
244              *                  |     |
245              *    1 ------------o-----o------------
246              */
247             if (a[e5] < a[e2]) { int t = a[e5]; a[e5] = a[e2]; a[e2] = t; }
248             if (a[e4] < a[e1]) { int t = a[e4]; a[e4] = a[e1]; a[e1] = t; }
249             if (a[e5] < a[e4]) { int t = a[e5]; a[e5] = a[e4]; a[e4] = t; }
250             if (a[e2] < a[e1]) { int t = a[e2]; a[e2] = a[e1]; a[e1] = t; }
251             if (a[e4] < a[e2]) { int t = a[e4]; a[e4] = a[e2]; a[e2] = t; }
252 
253             if (a3 < a[e2]) {
254                 if (a3 < a[e1]) {
255                     a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3;
256                 } else {
257                     a[e3] = a[e2]; a[e2] = a3;
258                 }
259             } else if (a3 > a[e4]) {
260                 if (a3 > a[e5]) {
261                     a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3;
262                 } else {
263                     a[e3] = a[e4]; a[e4] = a3;
264                 }
265             }
266 
267             // Pointers
268             int lower = low; // The index of the last element of the left part
269             int upper = end; // The index of the first element of the right part
270 
271             /*
272              * Partitioning with 2 pivots in case of different elements.
273              */
274             if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) {
275 
276                 /*
277                  * Use the first and fifth of the five sorted elements as
278                  * the pivots. These values are inexpensive approximation
279                  * of tertiles. Note, that pivot1 < pivot2.
280                  */
281                 int pivot1 = a[e1];
282                 int pivot2 = a[e5];
283 
284                 /*
285                  * The first and the last elements to be sorted are moved
286                  * to the locations formerly occupied by the pivots. When
287                  * partitioning is completed, the pivots are swapped back
288                  * into their final positions, and excluded from the next
289                  * subsequent sorting.
290                  */
291                 a[e1] = a[lower];
292                 a[e5] = a[upper];
293 
294                 /*
295                  * Skip elements, which are less or greater than the pivots.
296                  */
297                 while (a[++lower] < pivot1);
298                 while (a[--upper] > pivot2);
299 
300                 /*
301                  * Backward 3-interval partitioning
302                  *
303                  *   left part                 central part          right part
304                  * +------------------------------------------------------------+
305                  * |  < pivot1  |   ?   |  pivot1 <= && <= pivot2  |  > pivot2  |
306                  * +------------------------------------------------------------+
307                  *             ^       ^                            ^
308                  *             |       |                            |
309                  *           lower     k                          upper
310                  *
311                  * Invariants:
312                  *
313                  *              all in (low, lower] < pivot1
314                  *    pivot1 <= all in (k, upper)  <= pivot2
315                  *              all in [upper, end) > pivot2
316                  *
317                  * Pointer k is the last index of ?-part
318                  */
319                 for (int unused = --lower, k = ++upper; --k > lower; ) {
320                     int ak = a[k];
321 
322                     if (ak < pivot1) { // Move a[k] to the left side
323                         while (lower < k) {
324                             if (a[++lower] >= pivot1) {
325                                 if (a[lower] > pivot2) {
326                                     a[k] = a[--upper];
327                                     a[upper] = a[lower];
328                                 } else {
329                                     a[k] = a[lower];
330                                 }
331                                 a[lower] = ak;
332                                 break;
333                             }
334                         }
335                     } else if (ak > pivot2) { // Move a[k] to the right side
336                         a[k] = a[--upper];
337                         a[upper] = ak;
338                     }
339                 }
340 
341                 /*
342                  * Swap the pivots into their final positions.
343                  */
344                 a[low] = a[lower]; a[lower] = pivot1;
345                 a[end] = a[upper]; a[upper] = pivot2;
346 
347                 /*
348                  * Sort non-left parts recursively (possibly in parallel),
349                  * excluding known pivots.
350                  */
351                 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) {
352                     sorter.forkSorter(bits | 1, lower + 1, upper);
353                     sorter.forkSorter(bits | 1, upper + 1, high);
354                 } else {
355                     sort(sorter, a, bits | 1, lower + 1, upper);
356                     sort(sorter, a, bits | 1, upper + 1, high);
357                 }
358 
359             } else { // Use single pivot in case of many equal elements
360 
361                 /*
362                  * Use the third of the five sorted elements as the pivot.
363                  * This value is inexpensive approximation of the median.
364                  */
365                 int pivot = a[e3];
366 
367                 /*
368                  * The first element to be sorted is moved to the
369                  * location formerly occupied by the pivot. After
370                  * completion of partitioning the pivot is swapped
371                  * back into its final position, and excluded from
372                  * the next subsequent sorting.
373                  */
374                 a[e3] = a[lower];
375 
376                 /*
377                  * Traditional 3-way (Dutch National Flag) partitioning
378                  *
379                  *   left part                 central part    right part
380                  * +------------------------------------------------------+
381                  * |   < pivot   |     ?     |   == pivot   |   > pivot   |
382                  * +------------------------------------------------------+
383                  *              ^           ^                ^
384                  *              |           |                |
385                  *            lower         k              upper
386                  *
387                  * Invariants:
388                  *
389                  *   all in (low, lower] < pivot
390                  *   all in (k, upper)  == pivot
391                  *   all in [upper, end] > pivot
392                  *
393                  * Pointer k is the last index of ?-part
394                  */
395                 for (int k = ++upper; --k > lower; ) {
396                     int ak = a[k];
397 
398                     if (ak != pivot) {
399                         a[k] = pivot;
400 
401                         if (ak < pivot) { // Move a[k] to the left side
402                             while (a[++lower] < pivot);
403 
404                             if (a[lower] > pivot) {
405                                 a[--upper] = a[lower];
406                             }
407                             a[lower] = ak;
408                         } else { // ak > pivot - Move a[k] to the right side
409                             a[--upper] = ak;
410                         }
411                     }
412                 }
413 
414                 /*
415                  * Swap the pivot into its final position.
416                  */
417                 a[low] = a[lower]; a[lower] = pivot;
418 
419                 /*
420                  * Sort the right part (possibly in parallel), excluding
421                  * known pivot. All elements from the central part are
422                  * equal and therefore already sorted.
423                  */
424                 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) {
425                     sorter.forkSorter(bits | 1, upper, high);
426                 } else {
427                     sort(sorter, a, bits | 1, upper, high);
428                 }
429             }
430             high = lower; // Iterate along the left part
431         }
432     }
433 
434     /**
435      * Sorts the specified range of the array using mixed insertion sort.
436      *
437      * Mixed insertion sort is combination of simple insertion sort,
438      * pin insertion sort and pair insertion sort.
439      *
440      * In the context of Dual-Pivot Quicksort, the pivot element
441      * from the left part plays the role of sentinel, because it
442      * is less than any elements from the given part. Therefore,
443      * expensive check of the left range can be skipped on each
444      * iteration unless it is the leftmost call.
445      *
446      * @param a the array to be sorted
447      * @param low the index of the first element, inclusive, to be sorted
448      * @param end the index of the last element for simple insertion sort
449      * @param high the index of the last element, exclusive, to be sorted
450      */
mixedInsertionSort(int[] a, int low, int end, int high)451     private static void mixedInsertionSort(int[] a, int low, int end, int high) {
452         if (end == high) {
453 
454             /*
455              * Invoke simple insertion sort on tiny array.
456              */
457             for (int i; ++low < end; ) {
458                 int ai = a[i = low];
459 
460                 while (ai < a[--i]) {
461                     a[i + 1] = a[i];
462                 }
463                 a[i + 1] = ai;
464             }
465         } else {
466 
467             /*
468              * Start with pin insertion sort on small part.
469              *
470              * Pin insertion sort is extended simple insertion sort.
471              * The main idea of this sort is to put elements larger
472              * than an element called pin to the end of array (the
473              * proper area for such elements). It avoids expensive
474              * movements of these elements through the whole array.
475              */
476             int pin = a[end];
477 
478             for (int i, p = high; ++low < end; ) {
479                 int ai = a[i = low];
480 
481                 if (ai < a[i - 1]) { // Small element
482 
483                     /*
484                      * Insert small element into sorted part.
485                      */
486                     a[i] = a[--i];
487 
488                     while (ai < a[--i]) {
489                         a[i + 1] = a[i];
490                     }
491                     a[i + 1] = ai;
492 
493                 } else if (p > i && ai > pin) { // Large element
494 
495                     /*
496                      * Find element smaller than pin.
497                      */
498                     while (a[--p] > pin);
499 
500                     /*
501                      * Swap it with large element.
502                      */
503                     if (p > i) {
504                         ai = a[p];
505                         a[p] = a[i];
506                     }
507 
508                     /*
509                      * Insert small element into sorted part.
510                      */
511                     while (ai < a[--i]) {
512                         a[i + 1] = a[i];
513                     }
514                     a[i + 1] = ai;
515                 }
516             }
517 
518             /*
519              * Continue with pair insertion sort on remain part.
520              */
521             for (int i; low < high; ++low) {
522                 int a1 = a[i = low], a2 = a[++low];
523 
524                 /*
525                  * Insert two elements per iteration: at first, insert the
526                  * larger element and then insert the smaller element, but
527                  * from the position where the larger element was inserted.
528                  */
529                 if (a1 > a2) {
530 
531                     while (a1 < a[--i]) {
532                         a[i + 2] = a[i];
533                     }
534                     a[++i + 1] = a1;
535 
536                     while (a2 < a[--i]) {
537                         a[i + 1] = a[i];
538                     }
539                     a[i + 1] = a2;
540 
541                 } else if (a1 < a[i - 1]) {
542 
543                     while (a2 < a[--i]) {
544                         a[i + 2] = a[i];
545                     }
546                     a[++i + 1] = a2;
547 
548                     while (a1 < a[--i]) {
549                         a[i + 1] = a[i];
550                     }
551                     a[i + 1] = a1;
552                 }
553             }
554         }
555     }
556 
557     /**
558      * Sorts the specified range of the array using insertion sort.
559      *
560      * @param a the array to be sorted
561      * @param low the index of the first element, inclusive, to be sorted
562      * @param high the index of the last element, exclusive, to be sorted
563      */
insertionSort(int[] a, int low, int high)564     private static void insertionSort(int[] a, int low, int high) {
565         for (int i, k = low; ++k < high; ) {
566             int ai = a[i = k];
567 
568             if (ai < a[i - 1]) {
569                 while (--i >= low && ai < a[i]) {
570                     a[i + 1] = a[i];
571                 }
572                 a[i + 1] = ai;
573             }
574         }
575     }
576 
577     /**
578      * Sorts the specified range of the array using heap sort.
579      *
580      * @param a the array to be sorted
581      * @param low the index of the first element, inclusive, to be sorted
582      * @param high the index of the last element, exclusive, to be sorted
583      */
heapSort(int[] a, int low, int high)584     private static void heapSort(int[] a, int low, int high) {
585         for (int k = (low + high) >>> 1; k > low; ) {
586             pushDown(a, --k, a[k], low, high);
587         }
588         while (--high > low) {
589             int max = a[low];
590             pushDown(a, low, a[high], low, high);
591             a[high] = max;
592         }
593     }
594 
595     /**
596      * Pushes specified element down during heap sort.
597      *
598      * @param a the given array
599      * @param p the start index
600      * @param value the given element
601      * @param low the index of the first element, inclusive, to be sorted
602      * @param high the index of the last element, exclusive, to be sorted
603      */
pushDown(int[] a, int p, int value, int low, int high)604     private static void pushDown(int[] a, int p, int value, int low, int high) {
605         for (int k ;; a[p] = a[p = k]) {
606             k = (p << 1) - low + 2; // Index of the right child
607 
608             if (k > high) {
609                 break;
610             }
611             if (k == high || a[k] < a[k - 1]) {
612                 --k;
613             }
614             if (a[k] <= value) {
615                 break;
616             }
617         }
618         a[p] = value;
619     }
620 
621     /**
622      * Tries to sort the specified range of the array.
623      *
624      * @param sorter parallel context
625      * @param a the array to be sorted
626      * @param low the index of the first element to be sorted
627      * @param size the array size
628      * @return true if finally sorted, false otherwise
629      */
tryMergeRuns(Sorter sorter, int[] a, int low, int size)630     private static boolean tryMergeRuns(Sorter sorter, int[] a, int low, int size) {
631 
632         /*
633          * The run array is constructed only if initial runs are
634          * long enough to continue, run[i] then holds start index
635          * of the i-th sequence of elements in non-descending order.
636          */
637         int[] run = null;
638         int high = low + size;
639         int count = 1, last = low;
640 
641         /*
642          * Identify all possible runs.
643          */
644         for (int k = low + 1; k < high; ) {
645 
646             /*
647              * Find the end index of the current run.
648              */
649             if (a[k - 1] < a[k]) {
650 
651                 // Identify ascending sequence
652                 while (++k < high && a[k - 1] <= a[k]);
653 
654             } else if (a[k - 1] > a[k]) {
655 
656                 // Identify descending sequence
657                 while (++k < high && a[k - 1] >= a[k]);
658 
659                 // Reverse into ascending order
660                 for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) {
661                     int ai = a[i]; a[i] = a[j]; a[j] = ai;
662                 }
663             } else { // Identify constant sequence
664                 for (int ak = a[k]; ++k < high && ak == a[k]; );
665 
666                 if (k < high) {
667                     continue;
668                 }
669             }
670 
671             /*
672              * Check special cases.
673              */
674             if (run == null) {
675                 if (k == high) {
676 
677                     /*
678                      * The array is monotonous sequence,
679                      * and therefore already sorted.
680                      */
681                     return true;
682                 }
683 
684                 if (k - low < MIN_FIRST_RUN_SIZE) {
685 
686                     /*
687                      * The first run is too small
688                      * to proceed with scanning.
689                      */
690                     return false;
691                 }
692 
693                 run = new int[((size >> 10) | 0x7F) & 0x3FF];
694                 run[0] = low;
695 
696             } else if (a[last - 1] > a[last]) {
697 
698                 if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) {
699 
700                     /*
701                      * The first runs are not long
702                      * enough to continue scanning.
703                      */
704                     return false;
705                 }
706 
707                 if (++count == MAX_RUN_CAPACITY) {
708 
709                     /*
710                      * Array is not highly structured.
711                      */
712                     return false;
713                 }
714 
715                 if (count == run.length) {
716 
717                     /*
718                      * Increase capacity of index array.
719                      */
720                     run = Arrays.copyOf(run, count << 1);
721                 }
722             }
723             run[count] = (last = k);
724         }
725 
726         /*
727          * Merge runs of highly structured array.
728          */
729         if (count > 1) {
730             int[] b; int offset = low;
731 
732             if (sorter == null || (b = (int[]) sorter.b) == null) {
733                 b = new int[size];
734             } else {
735                 offset = sorter.offset;
736             }
737             mergeRuns(a, b, offset, 1, sorter != null, run, 0, count);
738         }
739         return true;
740     }
741 
742     /**
743      * Merges the specified runs.
744      *
745      * @param a the source array
746      * @param b the temporary buffer used in merging
747      * @param offset the start index in the source, inclusive
748      * @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0)
749      * @param parallel indicates whether merging is performed in parallel
750      * @param run the start indexes of the runs, inclusive
751      * @param lo the start index of the first run, inclusive
752      * @param hi the start index of the last run, inclusive
753      * @return the destination where runs are merged
754      */
mergeRuns(int[] a, int[] b, int offset, int aim, boolean parallel, int[] run, int lo, int hi)755     private static int[] mergeRuns(int[] a, int[] b, int offset,
756             int aim, boolean parallel, int[] run, int lo, int hi) {
757 
758         if (hi - lo == 1) {
759             if (aim >= 0) {
760                 return a;
761             }
762             for (int i = run[hi], j = i - offset, low = run[lo]; i > low;
763                 b[--j] = a[--i]
764             );
765             return b;
766         }
767 
768         /*
769          * Split into approximately equal parts.
770          */
771         int mi = lo, rmi = (run[lo] + run[hi]) >>> 1;
772         while (run[++mi + 1] <= rmi);
773 
774         /*
775          * Merge the left and right parts.
776          */
777         int[] a1, a2;
778 
779         if (parallel && hi - lo > MIN_RUN_COUNT) {
780             RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe();
781             a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi);
782             a2 = (int[]) merger.getDestination();
783         } else {
784             a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi);
785             a2 = mergeRuns(a, b, offset,    0, false, run, mi, hi);
786         }
787 
788         int[] dst = a1 == a ? b : a;
789 
790         int k   = a1 == a ? run[lo] - offset : run[lo];
791         int lo1 = a1 == b ? run[lo] - offset : run[lo];
792         int hi1 = a1 == b ? run[mi] - offset : run[mi];
793         int lo2 = a2 == b ? run[mi] - offset : run[mi];
794         int hi2 = a2 == b ? run[hi] - offset : run[hi];
795 
796         if (parallel) {
797             new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke();
798         } else {
799             mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2);
800         }
801         return dst;
802     }
803 
804     /**
805      * Merges the sorted parts.
806      *
807      * @param merger parallel context
808      * @param dst the destination where parts are merged
809      * @param k the start index of the destination, inclusive
810      * @param a1 the first part
811      * @param lo1 the start index of the first part, inclusive
812      * @param hi1 the end index of the first part, exclusive
813      * @param a2 the second part
814      * @param lo2 the start index of the second part, inclusive
815      * @param hi2 the end index of the second part, exclusive
816      */
mergeParts(Merger merger, int[] dst, int k, int[] a1, int lo1, int hi1, int[] a2, int lo2, int hi2)817     private static void mergeParts(Merger merger, int[] dst, int k,
818             int[] a1, int lo1, int hi1, int[] a2, int lo2, int hi2) {
819 
820         if (merger != null && a1 == a2) {
821 
822             while (true) {
823 
824                 /*
825                  * The first part must be larger.
826                  */
827                 if (hi1 - lo1 < hi2 - lo2) {
828                     int lo = lo1; lo1 = lo2; lo2 = lo;
829                     int hi = hi1; hi1 = hi2; hi2 = hi;
830                 }
831 
832                 /*
833                  * Small parts will be merged sequentially.
834                  */
835                 if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) {
836                     break;
837                 }
838 
839                 /*
840                  * Find the median of the larger part.
841                  */
842                 int mi1 = (lo1 + hi1) >>> 1;
843                 int key = a1[mi1];
844                 int mi2 = hi2;
845 
846                 /*
847                  * Partition the smaller part.
848                  */
849                 for (int loo = lo2; loo < mi2; ) {
850                     int t = (loo + mi2) >>> 1;
851 
852                     if (key > a2[t]) {
853                         loo = t + 1;
854                     } else {
855                         mi2 = t;
856                     }
857                 }
858 
859                 int d = mi2 - lo2 + mi1 - lo1;
860 
861                 /*
862                  * Merge the right sub-parts in parallel.
863                  */
864                 merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2);
865 
866                 /*
867                  * Process the sub-left parts.
868                  */
869                 hi1 = mi1;
870                 hi2 = mi2;
871             }
872         }
873 
874         /*
875          * Merge small parts sequentially.
876          */
877         while (lo1 < hi1 && lo2 < hi2) {
878             dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++];
879         }
880         if (dst != a1 || k < lo1) {
881             while (lo1 < hi1) {
882                 dst[k++] = a1[lo1++];
883             }
884         }
885         if (dst != a2 || k < lo2) {
886             while (lo2 < hi2) {
887                 dst[k++] = a2[lo2++];
888             }
889         }
890     }
891 
892 // [long]
893 
894     /**
895      * Sorts the specified range of the array using parallel merge
896      * sort and/or Dual-Pivot Quicksort.
897      *
898      * To balance the faster splitting and parallelism of merge sort
899      * with the faster element partitioning of Quicksort, ranges are
900      * subdivided in tiers such that, if there is enough parallelism,
901      * the four-way parallel merge is started, still ensuring enough
902      * parallelism to process the partitions.
903      *
904      * @param a the array to be sorted
905      * @param parallelism the parallelism level
906      * @param low the index of the first element, inclusive, to be sorted
907      * @param high the index of the last element, exclusive, to be sorted
908      */
909     static void sort(long[] a, int parallelism, int low, int high) {
910         int size = high - low;
911 
912         if (parallelism > 1 && size > MIN_PARALLEL_SORT_SIZE) {
913             int depth = getDepth(parallelism, size >> 12);
914             long[] b = depth == 0 ? null : new long[size];
915             new Sorter(null, a, b, low, size, low, depth).invoke();
916         } else {
917             sort(null, a, 0, low, high);
918         }
919     }
920 
921     /**
922      * Sorts the specified array using the Dual-Pivot Quicksort and/or
923      * other sorts in special-cases, possibly with parallel partitions.
924      *
925      * @param sorter parallel context
926      * @param a the array to be sorted
927      * @param bits the combination of recursion depth and bit flag, where
928      *        the right bit "0" indicates that array is the leftmost part
929      * @param low the index of the first element, inclusive, to be sorted
930      * @param high the index of the last element, exclusive, to be sorted
931      */
932     static void sort(Sorter sorter, long[] a, int bits, int low, int high) {
933         while (true) {
934             int end = high - 1, size = high - low;
935 
936             /*
937              * Run mixed insertion sort on small non-leftmost parts.
938              */
939             if (size < MAX_MIXED_INSERTION_SORT_SIZE + bits && (bits & 1) > 0) {
940                 mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high);
941                 return;
942             }
943 
944             /*
945              * Invoke insertion sort on small leftmost part.
946              */
947             if (size < MAX_INSERTION_SORT_SIZE) {
948                 insertionSort(a, low, high);
949                 return;
950             }
951 
952             /*
953              * Check if the whole array or large non-leftmost
954              * parts are nearly sorted and then merge runs.
955              */
956             if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0)
957                     && tryMergeRuns(sorter, a, low, size)) {
958                 return;
959             }
960 
961             /*
962              * Switch to heap sort if execution
963              * time is becoming quadratic.
964              */
965             if ((bits += DELTA) > MAX_RECURSION_DEPTH) {
966                 heapSort(a, low, high);
967                 return;
968             }
969 
970             /*
971              * Use an inexpensive approximation of the golden ratio
972              * to select five sample elements and determine pivots.
973              */
974             int step = (size >> 3) * 3 + 3;
975 
976             /*
977              * Five elements around (and including) the central element
978              * will be used for pivot selection as described below. The
979              * unequal choice of spacing these elements was empirically
980              * determined to work well on a wide variety of inputs.
981              */
982             int e1 = low + step;
983             int e5 = end - step;
984             int e3 = (e1 + e5) >>> 1;
985             int e2 = (e1 + e3) >>> 1;
986             int e4 = (e3 + e5) >>> 1;
987             long a3 = a[e3];
988 
989             /*
990              * Sort these elements in place by the combination
991              * of 4-element sorting network and insertion sort.
992              *
993              *    5 ------o-----------o------------
994              *            |           |
995              *    4 ------|-----o-----o-----o------
996              *            |     |           |
997              *    2 ------o-----|-----o-----o------
998              *                  |     |
999              *    1 ------------o-----o------------
1000              */
1001             if (a[e5] < a[e2]) { long t = a[e5]; a[e5] = a[e2]; a[e2] = t; }
1002             if (a[e4] < a[e1]) { long t = a[e4]; a[e4] = a[e1]; a[e1] = t; }
1003             if (a[e5] < a[e4]) { long t = a[e5]; a[e5] = a[e4]; a[e4] = t; }
1004             if (a[e2] < a[e1]) { long t = a[e2]; a[e2] = a[e1]; a[e1] = t; }
1005             if (a[e4] < a[e2]) { long t = a[e4]; a[e4] = a[e2]; a[e2] = t; }
1006 
1007             if (a3 < a[e2]) {
1008                 if (a3 < a[e1]) {
1009                     a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3;
1010                 } else {
1011                     a[e3] = a[e2]; a[e2] = a3;
1012                 }
1013             } else if (a3 > a[e4]) {
1014                 if (a3 > a[e5]) {
1015                     a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3;
1016                 } else {
1017                     a[e3] = a[e4]; a[e4] = a3;
1018                 }
1019             }
1020 
1021             // Pointers
1022             int lower = low; // The index of the last element of the left part
1023             int upper = end; // The index of the first element of the right part
1024 
1025             /*
1026              * Partitioning with 2 pivots in case of different elements.
1027              */
1028             if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) {
1029 
1030                 /*
1031                  * Use the first and fifth of the five sorted elements as
1032                  * the pivots. These values are inexpensive approximation
1033                  * of tertiles. Note, that pivot1 < pivot2.
1034                  */
1035                 long pivot1 = a[e1];
1036                 long pivot2 = a[e5];
1037 
1038                 /*
1039                  * The first and the last elements to be sorted are moved
1040                  * to the locations formerly occupied by the pivots. When
1041                  * partitioning is completed, the pivots are swapped back
1042                  * into their final positions, and excluded from the next
1043                  * subsequent sorting.
1044                  */
1045                 a[e1] = a[lower];
1046                 a[e5] = a[upper];
1047 
1048                 /*
1049                  * Skip elements, which are less or greater than the pivots.
1050                  */
1051                 while (a[++lower] < pivot1);
1052                 while (a[--upper] > pivot2);
1053 
1054                 /*
1055                  * Backward 3-interval partitioning
1056                  *
1057                  *   left part                 central part          right part
1058                  * +------------------------------------------------------------+
1059                  * |  < pivot1  |   ?   |  pivot1 <= && <= pivot2  |  > pivot2  |
1060                  * +------------------------------------------------------------+
1061                  *             ^       ^                            ^
1062                  *             |       |                            |
1063                  *           lower     k                          upper
1064                  *
1065                  * Invariants:
1066                  *
1067                  *              all in (low, lower] < pivot1
1068                  *    pivot1 <= all in (k, upper)  <= pivot2
1069                  *              all in [upper, end) > pivot2
1070                  *
1071                  * Pointer k is the last index of ?-part
1072                  */
1073                 for (int unused = --lower, k = ++upper; --k > lower; ) {
1074                     long ak = a[k];
1075 
1076                     if (ak < pivot1) { // Move a[k] to the left side
1077                         while (lower < k) {
1078                             if (a[++lower] >= pivot1) {
1079                                 if (a[lower] > pivot2) {
1080                                     a[k] = a[--upper];
1081                                     a[upper] = a[lower];
1082                                 } else {
1083                                     a[k] = a[lower];
1084                                 }
1085                                 a[lower] = ak;
1086                                 break;
1087                             }
1088                         }
1089                     } else if (ak > pivot2) { // Move a[k] to the right side
1090                         a[k] = a[--upper];
1091                         a[upper] = ak;
1092                     }
1093                 }
1094 
1095                 /*
1096                  * Swap the pivots into their final positions.
1097                  */
1098                 a[low] = a[lower]; a[lower] = pivot1;
1099                 a[end] = a[upper]; a[upper] = pivot2;
1100 
1101                 /*
1102                  * Sort non-left parts recursively (possibly in parallel),
1103                  * excluding known pivots.
1104                  */
1105                 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) {
1106                     sorter.forkSorter(bits | 1, lower + 1, upper);
1107                     sorter.forkSorter(bits | 1, upper + 1, high);
1108                 } else {
1109                     sort(sorter, a, bits | 1, lower + 1, upper);
1110                     sort(sorter, a, bits | 1, upper + 1, high);
1111                 }
1112 
1113             } else { // Use single pivot in case of many equal elements
1114 
1115                 /*
1116                  * Use the third of the five sorted elements as the pivot.
1117                  * This value is inexpensive approximation of the median.
1118                  */
1119                 long pivot = a[e3];
1120 
1121                 /*
1122                  * The first element to be sorted is moved to the
1123                  * location formerly occupied by the pivot. After
1124                  * completion of partitioning the pivot is swapped
1125                  * back into its final position, and excluded from
1126                  * the next subsequent sorting.
1127                  */
1128                 a[e3] = a[lower];
1129 
1130                 /*
1131                  * Traditional 3-way (Dutch National Flag) partitioning
1132                  *
1133                  *   left part                 central part    right part
1134                  * +------------------------------------------------------+
1135                  * |   < pivot   |     ?     |   == pivot   |   > pivot   |
1136                  * +------------------------------------------------------+
1137                  *              ^           ^                ^
1138                  *              |           |                |
1139                  *            lower         k              upper
1140                  *
1141                  * Invariants:
1142                  *
1143                  *   all in (low, lower] < pivot
1144                  *   all in (k, upper)  == pivot
1145                  *   all in [upper, end] > pivot
1146                  *
1147                  * Pointer k is the last index of ?-part
1148                  */
1149                 for (int k = ++upper; --k > lower; ) {
1150                     long ak = a[k];
1151 
1152                     if (ak != pivot) {
1153                         a[k] = pivot;
1154 
1155                         if (ak < pivot) { // Move a[k] to the left side
1156                             while (a[++lower] < pivot);
1157 
1158                             if (a[lower] > pivot) {
1159                                 a[--upper] = a[lower];
1160                             }
1161                             a[lower] = ak;
1162                         } else { // ak > pivot - Move a[k] to the right side
1163                             a[--upper] = ak;
1164                         }
1165                     }
1166                 }
1167 
1168                 /*
1169                  * Swap the pivot into its final position.
1170                  */
1171                 a[low] = a[lower]; a[lower] = pivot;
1172 
1173                 /*
1174                  * Sort the right part (possibly in parallel), excluding
1175                  * known pivot. All elements from the central part are
1176                  * equal and therefore already sorted.
1177                  */
1178                 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) {
1179                     sorter.forkSorter(bits | 1, upper, high);
1180                 } else {
1181                     sort(sorter, a, bits | 1, upper, high);
1182                 }
1183             }
1184             high = lower; // Iterate along the left part
1185         }
1186     }
1187 
1188     /**
1189      * Sorts the specified range of the array using mixed insertion sort.
1190      *
1191      * Mixed insertion sort is combination of simple insertion sort,
1192      * pin insertion sort and pair insertion sort.
1193      *
1194      * In the context of Dual-Pivot Quicksort, the pivot element
1195      * from the left part plays the role of sentinel, because it
1196      * is less than any elements from the given part. Therefore,
1197      * expensive check of the left range can be skipped on each
1198      * iteration unless it is the leftmost call.
1199      *
1200      * @param a the array to be sorted
1201      * @param low the index of the first element, inclusive, to be sorted
1202      * @param end the index of the last element for simple insertion sort
1203      * @param high the index of the last element, exclusive, to be sorted
1204      */
mixedInsertionSort(long[] a, int low, int end, int high)1205     private static void mixedInsertionSort(long[] a, int low, int end, int high) {
1206         if (end == high) {
1207 
1208             /*
1209              * Invoke simple insertion sort on tiny array.
1210              */
1211             for (int i; ++low < end; ) {
1212                 long ai = a[i = low];
1213 
1214                 while (ai < a[--i]) {
1215                     a[i + 1] = a[i];
1216                 }
1217                 a[i + 1] = ai;
1218             }
1219         } else {
1220 
1221             /*
1222              * Start with pin insertion sort on small part.
1223              *
1224              * Pin insertion sort is extended simple insertion sort.
1225              * The main idea of this sort is to put elements larger
1226              * than an element called pin to the end of array (the
1227              * proper area for such elements). It avoids expensive
1228              * movements of these elements through the whole array.
1229              */
1230             long pin = a[end];
1231 
1232             for (int i, p = high; ++low < end; ) {
1233                 long ai = a[i = low];
1234 
1235                 if (ai < a[i - 1]) { // Small element
1236 
1237                     /*
1238                      * Insert small element into sorted part.
1239                      */
1240                     a[i] = a[--i];
1241 
1242                     while (ai < a[--i]) {
1243                         a[i + 1] = a[i];
1244                     }
1245                     a[i + 1] = ai;
1246 
1247                 } else if (p > i && ai > pin) { // Large element
1248 
1249                     /*
1250                      * Find element smaller than pin.
1251                      */
1252                     while (a[--p] > pin);
1253 
1254                     /*
1255                      * Swap it with large element.
1256                      */
1257                     if (p > i) {
1258                         ai = a[p];
1259                         a[p] = a[i];
1260                     }
1261 
1262                     /*
1263                      * Insert small element into sorted part.
1264                      */
1265                     while (ai < a[--i]) {
1266                         a[i + 1] = a[i];
1267                     }
1268                     a[i + 1] = ai;
1269                 }
1270             }
1271 
1272             /*
1273              * Continue with pair insertion sort on remain part.
1274              */
1275             for (int i; low < high; ++low) {
1276                 long a1 = a[i = low], a2 = a[++low];
1277 
1278                 /*
1279                  * Insert two elements per iteration: at first, insert the
1280                  * larger element and then insert the smaller element, but
1281                  * from the position where the larger element was inserted.
1282                  */
1283                 if (a1 > a2) {
1284 
1285                     while (a1 < a[--i]) {
1286                         a[i + 2] = a[i];
1287                     }
1288                     a[++i + 1] = a1;
1289 
1290                     while (a2 < a[--i]) {
1291                         a[i + 1] = a[i];
1292                     }
1293                     a[i + 1] = a2;
1294 
1295                 } else if (a1 < a[i - 1]) {
1296 
1297                     while (a2 < a[--i]) {
1298                         a[i + 2] = a[i];
1299                     }
1300                     a[++i + 1] = a2;
1301 
1302                     while (a1 < a[--i]) {
1303                         a[i + 1] = a[i];
1304                     }
1305                     a[i + 1] = a1;
1306                 }
1307             }
1308         }
1309     }
1310 
1311     /**
1312      * Sorts the specified range of the array using insertion sort.
1313      *
1314      * @param a the array to be sorted
1315      * @param low the index of the first element, inclusive, to be sorted
1316      * @param high the index of the last element, exclusive, to be sorted
1317      */
insertionSort(long[] a, int low, int high)1318     private static void insertionSort(long[] a, int low, int high) {
1319         for (int i, k = low; ++k < high; ) {
1320             long ai = a[i = k];
1321 
1322             if (ai < a[i - 1]) {
1323                 while (--i >= low && ai < a[i]) {
1324                     a[i + 1] = a[i];
1325                 }
1326                 a[i + 1] = ai;
1327             }
1328         }
1329     }
1330 
1331     /**
1332      * Sorts the specified range of the array using heap sort.
1333      *
1334      * @param a the array to be sorted
1335      * @param low the index of the first element, inclusive, to be sorted
1336      * @param high the index of the last element, exclusive, to be sorted
1337      */
heapSort(long[] a, int low, int high)1338     private static void heapSort(long[] a, int low, int high) {
1339         for (int k = (low + high) >>> 1; k > low; ) {
1340             pushDown(a, --k, a[k], low, high);
1341         }
1342         while (--high > low) {
1343             long max = a[low];
1344             pushDown(a, low, a[high], low, high);
1345             a[high] = max;
1346         }
1347     }
1348 
1349     /**
1350      * Pushes specified element down during heap sort.
1351      *
1352      * @param a the given array
1353      * @param p the start index
1354      * @param value the given element
1355      * @param low the index of the first element, inclusive, to be sorted
1356      * @param high the index of the last element, exclusive, to be sorted
1357      */
pushDown(long[] a, int p, long value, int low, int high)1358     private static void pushDown(long[] a, int p, long value, int low, int high) {
1359         for (int k ;; a[p] = a[p = k]) {
1360             k = (p << 1) - low + 2; // Index of the right child
1361 
1362             if (k > high) {
1363                 break;
1364             }
1365             if (k == high || a[k] < a[k - 1]) {
1366                 --k;
1367             }
1368             if (a[k] <= value) {
1369                 break;
1370             }
1371         }
1372         a[p] = value;
1373     }
1374 
1375     /**
1376      * Tries to sort the specified range of the array.
1377      *
1378      * @param sorter parallel context
1379      * @param a the array to be sorted
1380      * @param low the index of the first element to be sorted
1381      * @param size the array size
1382      * @return true if finally sorted, false otherwise
1383      */
tryMergeRuns(Sorter sorter, long[] a, int low, int size)1384     private static boolean tryMergeRuns(Sorter sorter, long[] a, int low, int size) {
1385 
1386         /*
1387          * The run array is constructed only if initial runs are
1388          * long enough to continue, run[i] then holds start index
1389          * of the i-th sequence of elements in non-descending order.
1390          */
1391         int[] run = null;
1392         int high = low + size;
1393         int count = 1, last = low;
1394 
1395         /*
1396          * Identify all possible runs.
1397          */
1398         for (int k = low + 1; k < high; ) {
1399 
1400             /*
1401              * Find the end index of the current run.
1402              */
1403             if (a[k - 1] < a[k]) {
1404 
1405                 // Identify ascending sequence
1406                 while (++k < high && a[k - 1] <= a[k]);
1407 
1408             } else if (a[k - 1] > a[k]) {
1409 
1410                 // Identify descending sequence
1411                 while (++k < high && a[k - 1] >= a[k]);
1412 
1413                 // Reverse into ascending order
1414                 for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) {
1415                     long ai = a[i]; a[i] = a[j]; a[j] = ai;
1416                 }
1417             } else { // Identify constant sequence
1418                 for (long ak = a[k]; ++k < high && ak == a[k]; );
1419 
1420                 if (k < high) {
1421                     continue;
1422                 }
1423             }
1424 
1425             /*
1426              * Check special cases.
1427              */
1428             if (run == null) {
1429                 if (k == high) {
1430 
1431                     /*
1432                      * The array is monotonous sequence,
1433                      * and therefore already sorted.
1434                      */
1435                     return true;
1436                 }
1437 
1438                 if (k - low < MIN_FIRST_RUN_SIZE) {
1439 
1440                     /*
1441                      * The first run is too small
1442                      * to proceed with scanning.
1443                      */
1444                     return false;
1445                 }
1446 
1447                 run = new int[((size >> 10) | 0x7F) & 0x3FF];
1448                 run[0] = low;
1449 
1450             } else if (a[last - 1] > a[last]) {
1451 
1452                 if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) {
1453 
1454                     /*
1455                      * The first runs are not long
1456                      * enough to continue scanning.
1457                      */
1458                     return false;
1459                 }
1460 
1461                 if (++count == MAX_RUN_CAPACITY) {
1462 
1463                     /*
1464                      * Array is not highly structured.
1465                      */
1466                     return false;
1467                 }
1468 
1469                 if (count == run.length) {
1470 
1471                     /*
1472                      * Increase capacity of index array.
1473                      */
1474                     run = Arrays.copyOf(run, count << 1);
1475                 }
1476             }
1477             run[count] = (last = k);
1478         }
1479 
1480         /*
1481          * Merge runs of highly structured array.
1482          */
1483         if (count > 1) {
1484             long[] b; int offset = low;
1485 
1486             if (sorter == null || (b = (long[]) sorter.b) == null) {
1487                 b = new long[size];
1488             } else {
1489                 offset = sorter.offset;
1490             }
1491             mergeRuns(a, b, offset, 1, sorter != null, run, 0, count);
1492         }
1493         return true;
1494     }
1495 
1496     /**
1497      * Merges the specified runs.
1498      *
1499      * @param a the source array
1500      * @param b the temporary buffer used in merging
1501      * @param offset the start index in the source, inclusive
1502      * @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0)
1503      * @param parallel indicates whether merging is performed in parallel
1504      * @param run the start indexes of the runs, inclusive
1505      * @param lo the start index of the first run, inclusive
1506      * @param hi the start index of the last run, inclusive
1507      * @return the destination where runs are merged
1508      */
mergeRuns(long[] a, long[] b, int offset, int aim, boolean parallel, int[] run, int lo, int hi)1509     private static long[] mergeRuns(long[] a, long[] b, int offset,
1510             int aim, boolean parallel, int[] run, int lo, int hi) {
1511 
1512         if (hi - lo == 1) {
1513             if (aim >= 0) {
1514                 return a;
1515             }
1516             for (int i = run[hi], j = i - offset, low = run[lo]; i > low;
1517                 b[--j] = a[--i]
1518             );
1519             return b;
1520         }
1521 
1522         /*
1523          * Split into approximately equal parts.
1524          */
1525         int mi = lo, rmi = (run[lo] + run[hi]) >>> 1;
1526         while (run[++mi + 1] <= rmi);
1527 
1528         /*
1529          * Merge the left and right parts.
1530          */
1531         long[] a1, a2;
1532 
1533         if (parallel && hi - lo > MIN_RUN_COUNT) {
1534             RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe();
1535             a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi);
1536             a2 = (long[]) merger.getDestination();
1537         } else {
1538             a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi);
1539             a2 = mergeRuns(a, b, offset,    0, false, run, mi, hi);
1540         }
1541 
1542         long[] dst = a1 == a ? b : a;
1543 
1544         int k   = a1 == a ? run[lo] - offset : run[lo];
1545         int lo1 = a1 == b ? run[lo] - offset : run[lo];
1546         int hi1 = a1 == b ? run[mi] - offset : run[mi];
1547         int lo2 = a2 == b ? run[mi] - offset : run[mi];
1548         int hi2 = a2 == b ? run[hi] - offset : run[hi];
1549 
1550         if (parallel) {
1551             new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke();
1552         } else {
1553             mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2);
1554         }
1555         return dst;
1556     }
1557 
1558     /**
1559      * Merges the sorted parts.
1560      *
1561      * @param merger parallel context
1562      * @param dst the destination where parts are merged
1563      * @param k the start index of the destination, inclusive
1564      * @param a1 the first part
1565      * @param lo1 the start index of the first part, inclusive
1566      * @param hi1 the end index of the first part, exclusive
1567      * @param a2 the second part
1568      * @param lo2 the start index of the second part, inclusive
1569      * @param hi2 the end index of the second part, exclusive
1570      */
mergeParts(Merger merger, long[] dst, int k, long[] a1, int lo1, int hi1, long[] a2, int lo2, int hi2)1571     private static void mergeParts(Merger merger, long[] dst, int k,
1572             long[] a1, int lo1, int hi1, long[] a2, int lo2, int hi2) {
1573 
1574         if (merger != null && a1 == a2) {
1575 
1576             while (true) {
1577 
1578                 /*
1579                  * The first part must be larger.
1580                  */
1581                 if (hi1 - lo1 < hi2 - lo2) {
1582                     int lo = lo1; lo1 = lo2; lo2 = lo;
1583                     int hi = hi1; hi1 = hi2; hi2 = hi;
1584                 }
1585 
1586                 /*
1587                  * Small parts will be merged sequentially.
1588                  */
1589                 if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) {
1590                     break;
1591                 }
1592 
1593                 /*
1594                  * Find the median of the larger part.
1595                  */
1596                 int mi1 = (lo1 + hi1) >>> 1;
1597                 long key = a1[mi1];
1598                 int mi2 = hi2;
1599 
1600                 /*
1601                  * Partition the smaller part.
1602                  */
1603                 for (int loo = lo2; loo < mi2; ) {
1604                     int t = (loo + mi2) >>> 1;
1605 
1606                     if (key > a2[t]) {
1607                         loo = t + 1;
1608                     } else {
1609                         mi2 = t;
1610                     }
1611                 }
1612 
1613                 int d = mi2 - lo2 + mi1 - lo1;
1614 
1615                 /*
1616                  * Merge the right sub-parts in parallel.
1617                  */
1618                 merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2);
1619 
1620                 /*
1621                  * Process the sub-left parts.
1622                  */
1623                 hi1 = mi1;
1624                 hi2 = mi2;
1625             }
1626         }
1627 
1628         /*
1629          * Merge small parts sequentially.
1630          */
1631         while (lo1 < hi1 && lo2 < hi2) {
1632             dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++];
1633         }
1634         if (dst != a1 || k < lo1) {
1635             while (lo1 < hi1) {
1636                 dst[k++] = a1[lo1++];
1637             }
1638         }
1639         if (dst != a2 || k < lo2) {
1640             while (lo2 < hi2) {
1641                 dst[k++] = a2[lo2++];
1642             }
1643         }
1644     }
1645 
1646 // [byte]
1647 
1648     /**
1649      * Sorts the specified range of the array using
1650      * counting sort or insertion sort.
1651      *
1652      * @param a the array to be sorted
1653      * @param low the index of the first element, inclusive, to be sorted
1654      * @param high the index of the last element, exclusive, to be sorted
1655      */
1656     static void sort(byte[] a, int low, int high) {
1657         if (high - low > MIN_BYTE_COUNTING_SORT_SIZE) {
1658             countingSort(a, low, high);
1659         } else {
1660             insertionSort(a, low, high);
1661         }
1662     }
1663 
1664     /**
1665      * Sorts the specified range of the array using insertion sort.
1666      *
1667      * @param a the array to be sorted
1668      * @param low the index of the first element, inclusive, to be sorted
1669      * @param high the index of the last element, exclusive, to be sorted
1670      */
1671     private static void insertionSort(byte[] a, int low, int high) {
1672         for (int i, k = low; ++k < high; ) {
1673             byte ai = a[i = k];
1674 
1675             if (ai < a[i - 1]) {
1676                 while (--i >= low && ai < a[i]) {
1677                     a[i + 1] = a[i];
1678                 }
1679                 a[i + 1] = ai;
1680             }
1681         }
1682     }
1683 
1684     /**
1685      * The number of distinct byte values.
1686      */
1687     private static final int NUM_BYTE_VALUES = 1 << 8;
1688 
1689     /**
1690      * Max index of byte counter.
1691      */
1692     private static final int MAX_BYTE_INDEX = Byte.MAX_VALUE + NUM_BYTE_VALUES + 1;
1693 
1694     /**
1695      * Sorts the specified range of the array using counting sort.
1696      *
1697      * @param a the array to be sorted
1698      * @param low the index of the first element, inclusive, to be sorted
1699      * @param high the index of the last element, exclusive, to be sorted
1700      */
1701     private static void countingSort(byte[] a, int low, int high) {
1702         int[] count = new int[NUM_BYTE_VALUES];
1703 
1704         /*
1705          * Compute a histogram with the number of each values.
1706          */
1707         for (int i = high; i > low; ++count[a[--i] & 0xFF]);
1708 
1709         /*
1710          * Place values on their final positions.
1711          */
1712         if (high - low > NUM_BYTE_VALUES) {
1713             for (int i = MAX_BYTE_INDEX; --i > Byte.MAX_VALUE; ) {
1714                 int value = i & 0xFF;
1715 
1716                 for (low = high - count[value]; high > low;
1717                     a[--high] = (byte) value
1718                 );
1719             }
1720         } else {
1721             for (int i = MAX_BYTE_INDEX; high > low; ) {
1722                 while (count[--i & 0xFF] == 0);
1723 
1724                 int value = i & 0xFF;
1725                 int c = count[value];
1726 
1727                 do {
1728                     a[--high] = (byte) value;
1729                 } while (--c > 0);
1730             }
1731         }
1732     }
1733 
1734 // [char]
1735 
1736     /**
1737      * Sorts the specified range of the array using
1738      * counting sort or Dual-Pivot Quicksort.
1739      *
1740      * @param a the array to be sorted
1741      * @param low the index of the first element, inclusive, to be sorted
1742      * @param high the index of the last element, exclusive, to be sorted
1743      */
1744     static void sort(char[] a, int low, int high) {
1745         if (high - low > MIN_SHORT_OR_CHAR_COUNTING_SORT_SIZE) {
1746             countingSort(a, low, high);
1747         } else {
1748             sort(a, 0, low, high);
1749         }
1750     }
1751 
1752     /**
1753      * Sorts the specified array using the Dual-Pivot Quicksort and/or
1754      * other sorts in special-cases, possibly with parallel partitions.
1755      *
1756      * @param a the array to be sorted
1757      * @param bits the combination of recursion depth and bit flag, where
1758      *        the right bit "0" indicates that array is the leftmost part
1759      * @param low the index of the first element, inclusive, to be sorted
1760      * @param high the index of the last element, exclusive, to be sorted
1761      */
1762     static void sort(char[] a, int bits, int low, int high) {
1763         while (true) {
1764             int end = high - 1, size = high - low;
1765 
1766             /*
1767              * Invoke insertion sort on small leftmost part.
1768              */
1769             if (size < MAX_INSERTION_SORT_SIZE) {
1770                 insertionSort(a, low, high);
1771                 return;
1772             }
1773 
1774             /*
1775              * Switch to counting sort if execution
1776              * time is becoming quadratic.
1777              */
1778             if ((bits += DELTA) > MAX_RECURSION_DEPTH) {
1779                 countingSort(a, low, high);
1780                 return;
1781             }
1782 
1783             /*
1784              * Use an inexpensive approximation of the golden ratio
1785              * to select five sample elements and determine pivots.
1786              */
1787             int step = (size >> 3) * 3 + 3;
1788 
1789             /*
1790              * Five elements around (and including) the central element
1791              * will be used for pivot selection as described below. The
1792              * unequal choice of spacing these elements was empirically
1793              * determined to work well on a wide variety of inputs.
1794              */
1795             int e1 = low + step;
1796             int e5 = end - step;
1797             int e3 = (e1 + e5) >>> 1;
1798             int e2 = (e1 + e3) >>> 1;
1799             int e4 = (e3 + e5) >>> 1;
1800             char a3 = a[e3];
1801 
1802             /*
1803              * Sort these elements in place by the combination
1804              * of 4-element sorting network and insertion sort.
1805              *
1806              *    5 ------o-----------o------------
1807              *            |           |
1808              *    4 ------|-----o-----o-----o------
1809              *            |     |           |
1810              *    2 ------o-----|-----o-----o------
1811              *                  |     |
1812              *    1 ------------o-----o------------
1813              */
1814             if (a[e5] < a[e2]) { char t = a[e5]; a[e5] = a[e2]; a[e2] = t; }
1815             if (a[e4] < a[e1]) { char t = a[e4]; a[e4] = a[e1]; a[e1] = t; }
1816             if (a[e5] < a[e4]) { char t = a[e5]; a[e5] = a[e4]; a[e4] = t; }
1817             if (a[e2] < a[e1]) { char t = a[e2]; a[e2] = a[e1]; a[e1] = t; }
1818             if (a[e4] < a[e2]) { char t = a[e4]; a[e4] = a[e2]; a[e2] = t; }
1819 
1820             if (a3 < a[e2]) {
1821                 if (a3 < a[e1]) {
1822                     a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3;
1823                 } else {
1824                     a[e3] = a[e2]; a[e2] = a3;
1825                 }
1826             } else if (a3 > a[e4]) {
1827                 if (a3 > a[e5]) {
1828                     a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3;
1829                 } else {
1830                     a[e3] = a[e4]; a[e4] = a3;
1831                 }
1832             }
1833 
1834             // Pointers
1835             int lower = low; // The index of the last element of the left part
1836             int upper = end; // The index of the first element of the right part
1837 
1838             /*
1839              * Partitioning with 2 pivots in case of different elements.
1840              */
1841             if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) {
1842 
1843                 /*
1844                  * Use the first and fifth of the five sorted elements as
1845                  * the pivots. These values are inexpensive approximation
1846                  * of tertiles. Note, that pivot1 < pivot2.
1847                  */
1848                 char pivot1 = a[e1];
1849                 char pivot2 = a[e5];
1850 
1851                 /*
1852                  * The first and the last elements to be sorted are moved
1853                  * to the locations formerly occupied by the pivots. When
1854                  * partitioning is completed, the pivots are swapped back
1855                  * into their final positions, and excluded from the next
1856                  * subsequent sorting.
1857                  */
1858                 a[e1] = a[lower];
1859                 a[e5] = a[upper];
1860 
1861                 /*
1862                  * Skip elements, which are less or greater than the pivots.
1863                  */
1864                 while (a[++lower] < pivot1);
1865                 while (a[--upper] > pivot2);
1866 
1867                 /*
1868                  * Backward 3-interval partitioning
1869                  *
1870                  *   left part                 central part          right part
1871                  * +------------------------------------------------------------+
1872                  * |  < pivot1  |   ?   |  pivot1 <= && <= pivot2  |  > pivot2  |
1873                  * +------------------------------------------------------------+
1874                  *             ^       ^                            ^
1875                  *             |       |                            |
1876                  *           lower     k                          upper
1877                  *
1878                  * Invariants:
1879                  *
1880                  *              all in (low, lower] < pivot1
1881                  *    pivot1 <= all in (k, upper)  <= pivot2
1882                  *              all in [upper, end) > pivot2
1883                  *
1884                  * Pointer k is the last index of ?-part
1885                  */
1886                 for (int unused = --lower, k = ++upper; --k > lower; ) {
1887                     char ak = a[k];
1888 
1889                     if (ak < pivot1) { // Move a[k] to the left side
1890                         while (lower < k) {
1891                             if (a[++lower] >= pivot1) {
1892                                 if (a[lower] > pivot2) {
1893                                     a[k] = a[--upper];
1894                                     a[upper] = a[lower];
1895                                 } else {
1896                                     a[k] = a[lower];
1897                                 }
1898                                 a[lower] = ak;
1899                                 break;
1900                             }
1901                         }
1902                     } else if (ak > pivot2) { // Move a[k] to the right side
1903                         a[k] = a[--upper];
1904                         a[upper] = ak;
1905                     }
1906                 }
1907 
1908                 /*
1909                  * Swap the pivots into their final positions.
1910                  */
1911                 a[low] = a[lower]; a[lower] = pivot1;
1912                 a[end] = a[upper]; a[upper] = pivot2;
1913 
1914                 /*
1915                  * Sort non-left parts recursively,
1916                  * excluding known pivots.
1917                  */
1918                 sort(a, bits | 1, lower + 1, upper);
1919                 sort(a, bits | 1, upper + 1, high);
1920 
1921             } else { // Use single pivot in case of many equal elements
1922 
1923                 /*
1924                  * Use the third of the five sorted elements as the pivot.
1925                  * This value is inexpensive approximation of the median.
1926                  */
1927                 char pivot = a[e3];
1928 
1929                 /*
1930                  * The first element to be sorted is moved to the
1931                  * location formerly occupied by the pivot. After
1932                  * completion of partitioning the pivot is swapped
1933                  * back into its final position, and excluded from
1934                  * the next subsequent sorting.
1935                  */
1936                 a[e3] = a[lower];
1937 
1938                 /*
1939                  * Traditional 3-way (Dutch National Flag) partitioning
1940                  *
1941                  *   left part                 central part    right part
1942                  * +------------------------------------------------------+
1943                  * |   < pivot   |     ?     |   == pivot   |   > pivot   |
1944                  * +------------------------------------------------------+
1945                  *              ^           ^                ^
1946                  *              |           |                |
1947                  *            lower         k              upper
1948                  *
1949                  * Invariants:
1950                  *
1951                  *   all in (low, lower] < pivot
1952                  *   all in (k, upper)  == pivot
1953                  *   all in [upper, end] > pivot
1954                  *
1955                  * Pointer k is the last index of ?-part
1956                  */
1957                 for (int k = ++upper; --k > lower; ) {
1958                     char ak = a[k];
1959 
1960                     if (ak != pivot) {
1961                         a[k] = pivot;
1962 
1963                         if (ak < pivot) { // Move a[k] to the left side
1964                             while (a[++lower] < pivot);
1965 
1966                             if (a[lower] > pivot) {
1967                                 a[--upper] = a[lower];
1968                             }
1969                             a[lower] = ak;
1970                         } else { // ak > pivot - Move a[k] to the right side
1971                             a[--upper] = ak;
1972                         }
1973                     }
1974                 }
1975 
1976                 /*
1977                  * Swap the pivot into its final position.
1978                  */
1979                 a[low] = a[lower]; a[lower] = pivot;
1980 
1981                 /*
1982                  * Sort the right part, excluding known pivot.
1983                  * All elements from the central part are
1984                  * equal and therefore already sorted.
1985                  */
1986                 sort(a, bits | 1, upper, high);
1987             }
1988             high = lower; // Iterate along the left part
1989         }
1990     }
1991 
1992     /**
1993      * Sorts the specified range of the array using insertion sort.
1994      *
1995      * @param a the array to be sorted
1996      * @param low the index of the first element, inclusive, to be sorted
1997      * @param high the index of the last element, exclusive, to be sorted
1998      */
insertionSort(char[] a, int low, int high)1999     private static void insertionSort(char[] a, int low, int high) {
2000         for (int i, k = low; ++k < high; ) {
2001             char ai = a[i = k];
2002 
2003             if (ai < a[i - 1]) {
2004                 while (--i >= low && ai < a[i]) {
2005                     a[i + 1] = a[i];
2006                 }
2007                 a[i + 1] = ai;
2008             }
2009         }
2010     }
2011 
2012     /**
2013      * The number of distinct char values.
2014      */
2015     private static final int NUM_CHAR_VALUES = 1 << 16;
2016 
2017     /**
2018      * Sorts the specified range of the array using counting sort.
2019      *
2020      * @param a the array to be sorted
2021      * @param low the index of the first element, inclusive, to be sorted
2022      * @param high the index of the last element, exclusive, to be sorted
2023      */
countingSort(char[] a, int low, int high)2024     private static void countingSort(char[] a, int low, int high) {
2025         int[] count = new int[NUM_CHAR_VALUES];
2026 
2027         /*
2028          * Compute a histogram with the number of each values.
2029          */
2030         for (int i = high; i > low; ++count[a[--i]]);
2031 
2032         /*
2033          * Place values on their final positions.
2034          */
2035         if (high - low > NUM_CHAR_VALUES) {
2036             for (int i = NUM_CHAR_VALUES; i > 0; ) {
2037                 for (low = high - count[--i]; high > low;
2038                     a[--high] = (char) i
2039                 );
2040             }
2041         } else {
2042             for (int i = NUM_CHAR_VALUES; high > low; ) {
2043                 while (count[--i] == 0);
2044                 int c = count[i];
2045 
2046                 do {
2047                     a[--high] = (char) i;
2048                 } while (--c > 0);
2049             }
2050         }
2051     }
2052 
2053 // [short]
2054 
2055     /**
2056      * Sorts the specified range of the array using
2057      * counting sort or Dual-Pivot Quicksort.
2058      *
2059      * @param a the array to be sorted
2060      * @param low the index of the first element, inclusive, to be sorted
2061      * @param high the index of the last element, exclusive, to be sorted
2062      */
sort(short[] a, int low, int high)2063     static void sort(short[] a, int low, int high) {
2064         if (high - low > MIN_SHORT_OR_CHAR_COUNTING_SORT_SIZE) {
2065             countingSort(a, low, high);
2066         } else {
2067             sort(a, 0, low, high);
2068         }
2069     }
2070 
2071     /**
2072      * Sorts the specified array using the Dual-Pivot Quicksort and/or
2073      * other sorts in special-cases, possibly with parallel partitions.
2074      *
2075      * @param a the array to be sorted
2076      * @param bits the combination of recursion depth and bit flag, where
2077      *        the right bit "0" indicates that array is the leftmost part
2078      * @param low the index of the first element, inclusive, to be sorted
2079      * @param high the index of the last element, exclusive, to be sorted
2080      */
sort(short[] a, int bits, int low, int high)2081     static void sort(short[] a, int bits, int low, int high) {
2082         while (true) {
2083             int end = high - 1, size = high - low;
2084 
2085             /*
2086              * Invoke insertion sort on small leftmost part.
2087              */
2088             if (size < MAX_INSERTION_SORT_SIZE) {
2089                 insertionSort(a, low, high);
2090                 return;
2091             }
2092 
2093             /*
2094              * Switch to counting sort if execution
2095              * time is becoming quadratic.
2096              */
2097             if ((bits += DELTA) > MAX_RECURSION_DEPTH) {
2098                 countingSort(a, low, high);
2099                 return;
2100             }
2101 
2102             /*
2103              * Use an inexpensive approximation of the golden ratio
2104              * to select five sample elements and determine pivots.
2105              */
2106             int step = (size >> 3) * 3 + 3;
2107 
2108             /*
2109              * Five elements around (and including) the central element
2110              * will be used for pivot selection as described below. The
2111              * unequal choice of spacing these elements was empirically
2112              * determined to work well on a wide variety of inputs.
2113              */
2114             int e1 = low + step;
2115             int e5 = end - step;
2116             int e3 = (e1 + e5) >>> 1;
2117             int e2 = (e1 + e3) >>> 1;
2118             int e4 = (e3 + e5) >>> 1;
2119             short a3 = a[e3];
2120 
2121             /*
2122              * Sort these elements in place by the combination
2123              * of 4-element sorting network and insertion sort.
2124              *
2125              *    5 ------o-----------o------------
2126              *            |           |
2127              *    4 ------|-----o-----o-----o------
2128              *            |     |           |
2129              *    2 ------o-----|-----o-----o------
2130              *                  |     |
2131              *    1 ------------o-----o------------
2132              */
2133             if (a[e5] < a[e2]) { short t = a[e5]; a[e5] = a[e2]; a[e2] = t; }
2134             if (a[e4] < a[e1]) { short t = a[e4]; a[e4] = a[e1]; a[e1] = t; }
2135             if (a[e5] < a[e4]) { short t = a[e5]; a[e5] = a[e4]; a[e4] = t; }
2136             if (a[e2] < a[e1]) { short t = a[e2]; a[e2] = a[e1]; a[e1] = t; }
2137             if (a[e4] < a[e2]) { short t = a[e4]; a[e4] = a[e2]; a[e2] = t; }
2138 
2139             if (a3 < a[e2]) {
2140                 if (a3 < a[e1]) {
2141                     a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3;
2142                 } else {
2143                     a[e3] = a[e2]; a[e2] = a3;
2144                 }
2145             } else if (a3 > a[e4]) {
2146                 if (a3 > a[e5]) {
2147                     a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3;
2148                 } else {
2149                     a[e3] = a[e4]; a[e4] = a3;
2150                 }
2151             }
2152 
2153             // Pointers
2154             int lower = low; // The index of the last element of the left part
2155             int upper = end; // The index of the first element of the right part
2156 
2157             /*
2158              * Partitioning with 2 pivots in case of different elements.
2159              */
2160             if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) {
2161 
2162                 /*
2163                  * Use the first and fifth of the five sorted elements as
2164                  * the pivots. These values are inexpensive approximation
2165                  * of tertiles. Note, that pivot1 < pivot2.
2166                  */
2167                 short pivot1 = a[e1];
2168                 short pivot2 = a[e5];
2169 
2170                 /*
2171                  * The first and the last elements to be sorted are moved
2172                  * to the locations formerly occupied by the pivots. When
2173                  * partitioning is completed, the pivots are swapped back
2174                  * into their final positions, and excluded from the next
2175                  * subsequent sorting.
2176                  */
2177                 a[e1] = a[lower];
2178                 a[e5] = a[upper];
2179 
2180                 /*
2181                  * Skip elements, which are less or greater than the pivots.
2182                  */
2183                 while (a[++lower] < pivot1);
2184                 while (a[--upper] > pivot2);
2185 
2186                 /*
2187                  * Backward 3-interval partitioning
2188                  *
2189                  *   left part                 central part          right part
2190                  * +------------------------------------------------------------+
2191                  * |  < pivot1  |   ?   |  pivot1 <= && <= pivot2  |  > pivot2  |
2192                  * +------------------------------------------------------------+
2193                  *             ^       ^                            ^
2194                  *             |       |                            |
2195                  *           lower     k                          upper
2196                  *
2197                  * Invariants:
2198                  *
2199                  *              all in (low, lower] < pivot1
2200                  *    pivot1 <= all in (k, upper)  <= pivot2
2201                  *              all in [upper, end) > pivot2
2202                  *
2203                  * Pointer k is the last index of ?-part
2204                  */
2205                 for (int unused = --lower, k = ++upper; --k > lower; ) {
2206                     short ak = a[k];
2207 
2208                     if (ak < pivot1) { // Move a[k] to the left side
2209                         while (lower < k) {
2210                             if (a[++lower] >= pivot1) {
2211                                 if (a[lower] > pivot2) {
2212                                     a[k] = a[--upper];
2213                                     a[upper] = a[lower];
2214                                 } else {
2215                                     a[k] = a[lower];
2216                                 }
2217                                 a[lower] = ak;
2218                                 break;
2219                             }
2220                         }
2221                     } else if (ak > pivot2) { // Move a[k] to the right side
2222                         a[k] = a[--upper];
2223                         a[upper] = ak;
2224                     }
2225                 }
2226 
2227                 /*
2228                  * Swap the pivots into their final positions.
2229                  */
2230                 a[low] = a[lower]; a[lower] = pivot1;
2231                 a[end] = a[upper]; a[upper] = pivot2;
2232 
2233                 /*
2234                  * Sort non-left parts recursively,
2235                  * excluding known pivots.
2236                  */
2237                 sort(a, bits | 1, lower + 1, upper);
2238                 sort(a, bits | 1, upper + 1, high);
2239 
2240             } else { // Use single pivot in case of many equal elements
2241 
2242                 /*
2243                  * Use the third of the five sorted elements as the pivot.
2244                  * This value is inexpensive approximation of the median.
2245                  */
2246                 short pivot = a[e3];
2247 
2248                 /*
2249                  * The first element to be sorted is moved to the
2250                  * location formerly occupied by the pivot. After
2251                  * completion of partitioning the pivot is swapped
2252                  * back into its final position, and excluded from
2253                  * the next subsequent sorting.
2254                  */
2255                 a[e3] = a[lower];
2256 
2257                 /*
2258                  * Traditional 3-way (Dutch National Flag) partitioning
2259                  *
2260                  *   left part                 central part    right part
2261                  * +------------------------------------------------------+
2262                  * |   < pivot   |     ?     |   == pivot   |   > pivot   |
2263                  * +------------------------------------------------------+
2264                  *              ^           ^                ^
2265                  *              |           |                |
2266                  *            lower         k              upper
2267                  *
2268                  * Invariants:
2269                  *
2270                  *   all in (low, lower] < pivot
2271                  *   all in (k, upper)  == pivot
2272                  *   all in [upper, end] > pivot
2273                  *
2274                  * Pointer k is the last index of ?-part
2275                  */
2276                 for (int k = ++upper; --k > lower; ) {
2277                     short ak = a[k];
2278 
2279                     if (ak != pivot) {
2280                         a[k] = pivot;
2281 
2282                         if (ak < pivot) { // Move a[k] to the left side
2283                             while (a[++lower] < pivot);
2284 
2285                             if (a[lower] > pivot) {
2286                                 a[--upper] = a[lower];
2287                             }
2288                             a[lower] = ak;
2289                         } else { // ak > pivot - Move a[k] to the right side
2290                             a[--upper] = ak;
2291                         }
2292                     }
2293                 }
2294 
2295                 /*
2296                  * Swap the pivot into its final position.
2297                  */
2298                 a[low] = a[lower]; a[lower] = pivot;
2299 
2300                 /*
2301                  * Sort the right part, excluding known pivot.
2302                  * All elements from the central part are
2303                  * equal and therefore already sorted.
2304                  */
2305                 sort(a, bits | 1, upper, high);
2306             }
2307             high = lower; // Iterate along the left part
2308         }
2309     }
2310 
2311     /**
2312      * Sorts the specified range of the array using insertion sort.
2313      *
2314      * @param a the array to be sorted
2315      * @param low the index of the first element, inclusive, to be sorted
2316      * @param high the index of the last element, exclusive, to be sorted
2317      */
insertionSort(short[] a, int low, int high)2318     private static void insertionSort(short[] a, int low, int high) {
2319         for (int i, k = low; ++k < high; ) {
2320             short ai = a[i = k];
2321 
2322             if (ai < a[i - 1]) {
2323                 while (--i >= low && ai < a[i]) {
2324                     a[i + 1] = a[i];
2325                 }
2326                 a[i + 1] = ai;
2327             }
2328         }
2329     }
2330 
2331     /**
2332      * The number of distinct short values.
2333      */
2334     private static final int NUM_SHORT_VALUES = 1 << 16;
2335 
2336     /**
2337      * Max index of short counter.
2338      */
2339     private static final int MAX_SHORT_INDEX = Short.MAX_VALUE + NUM_SHORT_VALUES + 1;
2340 
2341     /**
2342      * Sorts the specified range of the array using counting sort.
2343      *
2344      * @param a the array to be sorted
2345      * @param low the index of the first element, inclusive, to be sorted
2346      * @param high the index of the last element, exclusive, to be sorted
2347      */
countingSort(short[] a, int low, int high)2348     private static void countingSort(short[] a, int low, int high) {
2349         int[] count = new int[NUM_SHORT_VALUES];
2350 
2351         /*
2352          * Compute a histogram with the number of each values.
2353          */
2354         for (int i = high; i > low; ++count[a[--i] & 0xFFFF]);
2355 
2356         /*
2357          * Place values on their final positions.
2358          */
2359         if (high - low > NUM_SHORT_VALUES) {
2360             for (int i = MAX_SHORT_INDEX; --i > Short.MAX_VALUE; ) {
2361                 int value = i & 0xFFFF;
2362 
2363                 for (low = high - count[value]; high > low;
2364                     a[--high] = (short) value
2365                 );
2366             }
2367         } else {
2368             for (int i = MAX_SHORT_INDEX; high > low; ) {
2369                 while (count[--i & 0xFFFF] == 0);
2370 
2371                 int value = i & 0xFFFF;
2372                 int c = count[value];
2373 
2374                 do {
2375                     a[--high] = (short) value;
2376                 } while (--c > 0);
2377             }
2378         }
2379     }
2380 
2381 // [float]
2382 
2383     /**
2384      * Sorts the specified range of the array using parallel merge
2385      * sort and/or Dual-Pivot Quicksort.
2386      *
2387      * To balance the faster splitting and parallelism of merge sort
2388      * with the faster element partitioning of Quicksort, ranges are
2389      * subdivided in tiers such that, if there is enough parallelism,
2390      * the four-way parallel merge is started, still ensuring enough
2391      * parallelism to process the partitions.
2392      *
2393      * @param a the array to be sorted
2394      * @param parallelism the parallelism level
2395      * @param low the index of the first element, inclusive, to be sorted
2396      * @param high the index of the last element, exclusive, to be sorted
2397      */
sort(float[] a, int parallelism, int low, int high)2398     static void sort(float[] a, int parallelism, int low, int high) {
2399         /*
2400          * Phase 1. Count the number of negative zero -0.0f,
2401          * turn them into positive zero, and move all NaNs
2402          * to the end of the array.
2403          */
2404         int numNegativeZero = 0;
2405 
2406         for (int k = high; k > low; ) {
2407             float ak = a[--k];
2408 
2409             if (ak == 0.0f && Float.floatToRawIntBits(ak) < 0) { // ak is -0.0f
2410                 numNegativeZero += 1;
2411                 a[k] = 0.0f;
2412             } else if (ak != ak) { // ak is NaN
2413                 a[k] = a[--high];
2414                 a[high] = ak;
2415             }
2416         }
2417 
2418         /*
2419          * Phase 2. Sort everything except NaNs,
2420          * which are already in place.
2421          */
2422         int size = high - low;
2423 
2424         if (parallelism > 1 && size > MIN_PARALLEL_SORT_SIZE) {
2425             int depth = getDepth(parallelism, size >> 12);
2426             float[] b = depth == 0 ? null : new float[size];
2427             new Sorter(null, a, b, low, size, low, depth).invoke();
2428         } else {
2429             sort(null, a, 0, low, high);
2430         }
2431 
2432         /*
2433          * Phase 3. Turn positive zero 0.0f
2434          * back into negative zero -0.0f.
2435          */
2436         if (++numNegativeZero == 1) {
2437             return;
2438         }
2439 
2440         /*
2441          * Find the position one less than
2442          * the index of the first zero.
2443          */
2444         while (low <= high) {
2445             int middle = (low + high) >>> 1;
2446 
2447             if (a[middle] < 0) {
2448                 low = middle + 1;
2449             } else {
2450                 high = middle - 1;
2451             }
2452         }
2453 
2454         /*
2455          * Replace the required number of 0.0f by -0.0f.
2456          */
2457         while (--numNegativeZero > 0) {
2458             a[++high] = -0.0f;
2459         }
2460     }
2461 
2462     /**
2463      * Sorts the specified array using the Dual-Pivot Quicksort and/or
2464      * other sorts in special-cases, possibly with parallel partitions.
2465      *
2466      * @param sorter parallel context
2467      * @param a the array to be sorted
2468      * @param bits the combination of recursion depth and bit flag, where
2469      *        the right bit "0" indicates that array is the leftmost part
2470      * @param low the index of the first element, inclusive, to be sorted
2471      * @param high the index of the last element, exclusive, to be sorted
2472      */
sort(Sorter sorter, float[] a, int bits, int low, int high)2473     static void sort(Sorter sorter, float[] a, int bits, int low, int high) {
2474         while (true) {
2475             int end = high - 1, size = high - low;
2476 
2477             /*
2478              * Run mixed insertion sort on small non-leftmost parts.
2479              */
2480             if (size < MAX_MIXED_INSERTION_SORT_SIZE + bits && (bits & 1) > 0) {
2481                 mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high);
2482                 return;
2483             }
2484 
2485             /*
2486              * Invoke insertion sort on small leftmost part.
2487              */
2488             if (size < MAX_INSERTION_SORT_SIZE) {
2489                 insertionSort(a, low, high);
2490                 return;
2491             }
2492 
2493             /*
2494              * Check if the whole array or large non-leftmost
2495              * parts are nearly sorted and then merge runs.
2496              */
2497             if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0)
2498                     && tryMergeRuns(sorter, a, low, size)) {
2499                 return;
2500             }
2501 
2502             /*
2503              * Switch to heap sort if execution
2504              * time is becoming quadratic.
2505              */
2506             if ((bits += DELTA) > MAX_RECURSION_DEPTH) {
2507                 heapSort(a, low, high);
2508                 return;
2509             }
2510 
2511             /*
2512              * Use an inexpensive approximation of the golden ratio
2513              * to select five sample elements and determine pivots.
2514              */
2515             int step = (size >> 3) * 3 + 3;
2516 
2517             /*
2518              * Five elements around (and including) the central element
2519              * will be used for pivot selection as described below. The
2520              * unequal choice of spacing these elements was empirically
2521              * determined to work well on a wide variety of inputs.
2522              */
2523             int e1 = low + step;
2524             int e5 = end - step;
2525             int e3 = (e1 + e5) >>> 1;
2526             int e2 = (e1 + e3) >>> 1;
2527             int e4 = (e3 + e5) >>> 1;
2528             float a3 = a[e3];
2529 
2530             /*
2531              * Sort these elements in place by the combination
2532              * of 4-element sorting network and insertion sort.
2533              *
2534              *    5 ------o-----------o------------
2535              *            |           |
2536              *    4 ------|-----o-----o-----o------
2537              *            |     |           |
2538              *    2 ------o-----|-----o-----o------
2539              *                  |     |
2540              *    1 ------------o-----o------------
2541              */
2542             if (a[e5] < a[e2]) { float t = a[e5]; a[e5] = a[e2]; a[e2] = t; }
2543             if (a[e4] < a[e1]) { float t = a[e4]; a[e4] = a[e1]; a[e1] = t; }
2544             if (a[e5] < a[e4]) { float t = a[e5]; a[e5] = a[e4]; a[e4] = t; }
2545             if (a[e2] < a[e1]) { float t = a[e2]; a[e2] = a[e1]; a[e1] = t; }
2546             if (a[e4] < a[e2]) { float t = a[e4]; a[e4] = a[e2]; a[e2] = t; }
2547 
2548             if (a3 < a[e2]) {
2549                 if (a3 < a[e1]) {
2550                     a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3;
2551                 } else {
2552                     a[e3] = a[e2]; a[e2] = a3;
2553                 }
2554             } else if (a3 > a[e4]) {
2555                 if (a3 > a[e5]) {
2556                     a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3;
2557                 } else {
2558                     a[e3] = a[e4]; a[e4] = a3;
2559                 }
2560             }
2561 
2562             // Pointers
2563             int lower = low; // The index of the last element of the left part
2564             int upper = end; // The index of the first element of the right part
2565 
2566             /*
2567              * Partitioning with 2 pivots in case of different elements.
2568              */
2569             if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) {
2570 
2571                 /*
2572                  * Use the first and fifth of the five sorted elements as
2573                  * the pivots. These values are inexpensive approximation
2574                  * of tertiles. Note, that pivot1 < pivot2.
2575                  */
2576                 float pivot1 = a[e1];
2577                 float pivot2 = a[e5];
2578 
2579                 /*
2580                  * The first and the last elements to be sorted are moved
2581                  * to the locations formerly occupied by the pivots. When
2582                  * partitioning is completed, the pivots are swapped back
2583                  * into their final positions, and excluded from the next
2584                  * subsequent sorting.
2585                  */
2586                 a[e1] = a[lower];
2587                 a[e5] = a[upper];
2588 
2589                 /*
2590                  * Skip elements, which are less or greater than the pivots.
2591                  */
2592                 while (a[++lower] < pivot1);
2593                 while (a[--upper] > pivot2);
2594 
2595                 /*
2596                  * Backward 3-interval partitioning
2597                  *
2598                  *   left part                 central part          right part
2599                  * +------------------------------------------------------------+
2600                  * |  < pivot1  |   ?   |  pivot1 <= && <= pivot2  |  > pivot2  |
2601                  * +------------------------------------------------------------+
2602                  *             ^       ^                            ^
2603                  *             |       |                            |
2604                  *           lower     k                          upper
2605                  *
2606                  * Invariants:
2607                  *
2608                  *              all in (low, lower] < pivot1
2609                  *    pivot1 <= all in (k, upper)  <= pivot2
2610                  *              all in [upper, end) > pivot2
2611                  *
2612                  * Pointer k is the last index of ?-part
2613                  */
2614                 for (int unused = --lower, k = ++upper; --k > lower; ) {
2615                     float ak = a[k];
2616 
2617                     if (ak < pivot1) { // Move a[k] to the left side
2618                         while (lower < k) {
2619                             if (a[++lower] >= pivot1) {
2620                                 if (a[lower] > pivot2) {
2621                                     a[k] = a[--upper];
2622                                     a[upper] = a[lower];
2623                                 } else {
2624                                     a[k] = a[lower];
2625                                 }
2626                                 a[lower] = ak;
2627                                 break;
2628                             }
2629                         }
2630                     } else if (ak > pivot2) { // Move a[k] to the right side
2631                         a[k] = a[--upper];
2632                         a[upper] = ak;
2633                     }
2634                 }
2635 
2636                 /*
2637                  * Swap the pivots into their final positions.
2638                  */
2639                 a[low] = a[lower]; a[lower] = pivot1;
2640                 a[end] = a[upper]; a[upper] = pivot2;
2641 
2642                 /*
2643                  * Sort non-left parts recursively (possibly in parallel),
2644                  * excluding known pivots.
2645                  */
2646                 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) {
2647                     sorter.forkSorter(bits | 1, lower + 1, upper);
2648                     sorter.forkSorter(bits | 1, upper + 1, high);
2649                 } else {
2650                     sort(sorter, a, bits | 1, lower + 1, upper);
2651                     sort(sorter, a, bits | 1, upper + 1, high);
2652                 }
2653 
2654             } else { // Use single pivot in case of many equal elements
2655 
2656                 /*
2657                  * Use the third of the five sorted elements as the pivot.
2658                  * This value is inexpensive approximation of the median.
2659                  */
2660                 float pivot = a[e3];
2661 
2662                 /*
2663                  * The first element to be sorted is moved to the
2664                  * location formerly occupied by the pivot. After
2665                  * completion of partitioning the pivot is swapped
2666                  * back into its final position, and excluded from
2667                  * the next subsequent sorting.
2668                  */
2669                 a[e3] = a[lower];
2670 
2671                 /*
2672                  * Traditional 3-way (Dutch National Flag) partitioning
2673                  *
2674                  *   left part                 central part    right part
2675                  * +------------------------------------------------------+
2676                  * |   < pivot   |     ?     |   == pivot   |   > pivot   |
2677                  * +------------------------------------------------------+
2678                  *              ^           ^                ^
2679                  *              |           |                |
2680                  *            lower         k              upper
2681                  *
2682                  * Invariants:
2683                  *
2684                  *   all in (low, lower] < pivot
2685                  *   all in (k, upper)  == pivot
2686                  *   all in [upper, end] > pivot
2687                  *
2688                  * Pointer k is the last index of ?-part
2689                  */
2690                 for (int k = ++upper; --k > lower; ) {
2691                     float ak = a[k];
2692 
2693                     if (ak != pivot) {
2694                         a[k] = pivot;
2695 
2696                         if (ak < pivot) { // Move a[k] to the left side
2697                             while (a[++lower] < pivot);
2698 
2699                             if (a[lower] > pivot) {
2700                                 a[--upper] = a[lower];
2701                             }
2702                             a[lower] = ak;
2703                         } else { // ak > pivot - Move a[k] to the right side
2704                             a[--upper] = ak;
2705                         }
2706                     }
2707                 }
2708 
2709                 /*
2710                  * Swap the pivot into its final position.
2711                  */
2712                 a[low] = a[lower]; a[lower] = pivot;
2713 
2714                 /*
2715                  * Sort the right part (possibly in parallel), excluding
2716                  * known pivot. All elements from the central part are
2717                  * equal and therefore already sorted.
2718                  */
2719                 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) {
2720                     sorter.forkSorter(bits | 1, upper, high);
2721                 } else {
2722                     sort(sorter, a, bits | 1, upper, high);
2723                 }
2724             }
2725             high = lower; // Iterate along the left part
2726         }
2727     }
2728 
2729     /**
2730      * Sorts the specified range of the array using mixed insertion sort.
2731      *
2732      * Mixed insertion sort is combination of simple insertion sort,
2733      * pin insertion sort and pair insertion sort.
2734      *
2735      * In the context of Dual-Pivot Quicksort, the pivot element
2736      * from the left part plays the role of sentinel, because it
2737      * is less than any elements from the given part. Therefore,
2738      * expensive check of the left range can be skipped on each
2739      * iteration unless it is the leftmost call.
2740      *
2741      * @param a the array to be sorted
2742      * @param low the index of the first element, inclusive, to be sorted
2743      * @param end the index of the last element for simple insertion sort
2744      * @param high the index of the last element, exclusive, to be sorted
2745      */
mixedInsertionSort(float[] a, int low, int end, int high)2746     private static void mixedInsertionSort(float[] a, int low, int end, int high) {
2747         if (end == high) {
2748 
2749             /*
2750              * Invoke simple insertion sort on tiny array.
2751              */
2752             for (int i; ++low < end; ) {
2753                 float ai = a[i = low];
2754 
2755                 while (ai < a[--i]) {
2756                     a[i + 1] = a[i];
2757                 }
2758                 a[i + 1] = ai;
2759             }
2760         } else {
2761 
2762             /*
2763              * Start with pin insertion sort on small part.
2764              *
2765              * Pin insertion sort is extended simple insertion sort.
2766              * The main idea of this sort is to put elements larger
2767              * than an element called pin to the end of array (the
2768              * proper area for such elements). It avoids expensive
2769              * movements of these elements through the whole array.
2770              */
2771             float pin = a[end];
2772 
2773             for (int i, p = high; ++low < end; ) {
2774                 float ai = a[i = low];
2775 
2776                 if (ai < a[i - 1]) { // Small element
2777 
2778                     /*
2779                      * Insert small element into sorted part.
2780                      */
2781                     a[i] = a[--i];
2782 
2783                     while (ai < a[--i]) {
2784                         a[i + 1] = a[i];
2785                     }
2786                     a[i + 1] = ai;
2787 
2788                 } else if (p > i && ai > pin) { // Large element
2789 
2790                     /*
2791                      * Find element smaller than pin.
2792                      */
2793                     while (a[--p] > pin);
2794 
2795                     /*
2796                      * Swap it with large element.
2797                      */
2798                     if (p > i) {
2799                         ai = a[p];
2800                         a[p] = a[i];
2801                     }
2802 
2803                     /*
2804                      * Insert small element into sorted part.
2805                      */
2806                     while (ai < a[--i]) {
2807                         a[i + 1] = a[i];
2808                     }
2809                     a[i + 1] = ai;
2810                 }
2811             }
2812 
2813             /*
2814              * Continue with pair insertion sort on remain part.
2815              */
2816             for (int i; low < high; ++low) {
2817                 float a1 = a[i = low], a2 = a[++low];
2818 
2819                 /*
2820                  * Insert two elements per iteration: at first, insert the
2821                  * larger element and then insert the smaller element, but
2822                  * from the position where the larger element was inserted.
2823                  */
2824                 if (a1 > a2) {
2825 
2826                     while (a1 < a[--i]) {
2827                         a[i + 2] = a[i];
2828                     }
2829                     a[++i + 1] = a1;
2830 
2831                     while (a2 < a[--i]) {
2832                         a[i + 1] = a[i];
2833                     }
2834                     a[i + 1] = a2;
2835 
2836                 } else if (a1 < a[i - 1]) {
2837 
2838                     while (a2 < a[--i]) {
2839                         a[i + 2] = a[i];
2840                     }
2841                     a[++i + 1] = a2;
2842 
2843                     while (a1 < a[--i]) {
2844                         a[i + 1] = a[i];
2845                     }
2846                     a[i + 1] = a1;
2847                 }
2848             }
2849         }
2850     }
2851 
2852     /**
2853      * Sorts the specified range of the array using insertion sort.
2854      *
2855      * @param a the array to be sorted
2856      * @param low the index of the first element, inclusive, to be sorted
2857      * @param high the index of the last element, exclusive, to be sorted
2858      */
insertionSort(float[] a, int low, int high)2859     private static void insertionSort(float[] a, int low, int high) {
2860         for (int i, k = low; ++k < high; ) {
2861             float ai = a[i = k];
2862 
2863             if (ai < a[i - 1]) {
2864                 while (--i >= low && ai < a[i]) {
2865                     a[i + 1] = a[i];
2866                 }
2867                 a[i + 1] = ai;
2868             }
2869         }
2870     }
2871 
2872     /**
2873      * Sorts the specified range of the array using heap sort.
2874      *
2875      * @param a the array to be sorted
2876      * @param low the index of the first element, inclusive, to be sorted
2877      * @param high the index of the last element, exclusive, to be sorted
2878      */
heapSort(float[] a, int low, int high)2879     private static void heapSort(float[] a, int low, int high) {
2880         for (int k = (low + high) >>> 1; k > low; ) {
2881             pushDown(a, --k, a[k], low, high);
2882         }
2883         while (--high > low) {
2884             float max = a[low];
2885             pushDown(a, low, a[high], low, high);
2886             a[high] = max;
2887         }
2888     }
2889 
2890     /**
2891      * Pushes specified element down during heap sort.
2892      *
2893      * @param a the given array
2894      * @param p the start index
2895      * @param value the given element
2896      * @param low the index of the first element, inclusive, to be sorted
2897      * @param high the index of the last element, exclusive, to be sorted
2898      */
pushDown(float[] a, int p, float value, int low, int high)2899     private static void pushDown(float[] a, int p, float value, int low, int high) {
2900         for (int k ;; a[p] = a[p = k]) {
2901             k = (p << 1) - low + 2; // Index of the right child
2902 
2903             if (k > high) {
2904                 break;
2905             }
2906             if (k == high || a[k] < a[k - 1]) {
2907                 --k;
2908             }
2909             if (a[k] <= value) {
2910                 break;
2911             }
2912         }
2913         a[p] = value;
2914     }
2915 
2916     /**
2917      * Tries to sort the specified range of the array.
2918      *
2919      * @param sorter parallel context
2920      * @param a the array to be sorted
2921      * @param low the index of the first element to be sorted
2922      * @param size the array size
2923      * @return true if finally sorted, false otherwise
2924      */
tryMergeRuns(Sorter sorter, float[] a, int low, int size)2925     private static boolean tryMergeRuns(Sorter sorter, float[] a, int low, int size) {
2926 
2927         /*
2928          * The run array is constructed only if initial runs are
2929          * long enough to continue, run[i] then holds start index
2930          * of the i-th sequence of elements in non-descending order.
2931          */
2932         int[] run = null;
2933         int high = low + size;
2934         int count = 1, last = low;
2935 
2936         /*
2937          * Identify all possible runs.
2938          */
2939         for (int k = low + 1; k < high; ) {
2940 
2941             /*
2942              * Find the end index of the current run.
2943              */
2944             if (a[k - 1] < a[k]) {
2945 
2946                 // Identify ascending sequence
2947                 while (++k < high && a[k - 1] <= a[k]);
2948 
2949             } else if (a[k - 1] > a[k]) {
2950 
2951                 // Identify descending sequence
2952                 while (++k < high && a[k - 1] >= a[k]);
2953 
2954                 // Reverse into ascending order
2955                 for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) {
2956                     float ai = a[i]; a[i] = a[j]; a[j] = ai;
2957                 }
2958             } else { // Identify constant sequence
2959                 for (float ak = a[k]; ++k < high && ak == a[k]; );
2960 
2961                 if (k < high) {
2962                     continue;
2963                 }
2964             }
2965 
2966             /*
2967              * Check special cases.
2968              */
2969             if (run == null) {
2970                 if (k == high) {
2971 
2972                     /*
2973                      * The array is monotonous sequence,
2974                      * and therefore already sorted.
2975                      */
2976                     return true;
2977                 }
2978 
2979                 if (k - low < MIN_FIRST_RUN_SIZE) {
2980 
2981                     /*
2982                      * The first run is too small
2983                      * to proceed with scanning.
2984                      */
2985                     return false;
2986                 }
2987 
2988                 run = new int[((size >> 10) | 0x7F) & 0x3FF];
2989                 run[0] = low;
2990 
2991             } else if (a[last - 1] > a[last]) {
2992 
2993                 if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) {
2994 
2995                     /*
2996                      * The first runs are not long
2997                      * enough to continue scanning.
2998                      */
2999                     return false;
3000                 }
3001 
3002                 if (++count == MAX_RUN_CAPACITY) {
3003 
3004                     /*
3005                      * Array is not highly structured.
3006                      */
3007                     return false;
3008                 }
3009 
3010                 if (count == run.length) {
3011 
3012                     /*
3013                      * Increase capacity of index array.
3014                      */
3015                     run = Arrays.copyOf(run, count << 1);
3016                 }
3017             }
3018             run[count] = (last = k);
3019         }
3020 
3021         /*
3022          * Merge runs of highly structured array.
3023          */
3024         if (count > 1) {
3025             float[] b; int offset = low;
3026 
3027             if (sorter == null || (b = (float[]) sorter.b) == null) {
3028                 b = new float[size];
3029             } else {
3030                 offset = sorter.offset;
3031             }
3032             mergeRuns(a, b, offset, 1, sorter != null, run, 0, count);
3033         }
3034         return true;
3035     }
3036 
3037     /**
3038      * Merges the specified runs.
3039      *
3040      * @param a the source array
3041      * @param b the temporary buffer used in merging
3042      * @param offset the start index in the source, inclusive
3043      * @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0)
3044      * @param parallel indicates whether merging is performed in parallel
3045      * @param run the start indexes of the runs, inclusive
3046      * @param lo the start index of the first run, inclusive
3047      * @param hi the start index of the last run, inclusive
3048      * @return the destination where runs are merged
3049      */
mergeRuns(float[] a, float[] b, int offset, int aim, boolean parallel, int[] run, int lo, int hi)3050     private static float[] mergeRuns(float[] a, float[] b, int offset,
3051             int aim, boolean parallel, int[] run, int lo, int hi) {
3052 
3053         if (hi - lo == 1) {
3054             if (aim >= 0) {
3055                 return a;
3056             }
3057             for (int i = run[hi], j = i - offset, low = run[lo]; i > low;
3058                 b[--j] = a[--i]
3059             );
3060             return b;
3061         }
3062 
3063         /*
3064          * Split into approximately equal parts.
3065          */
3066         int mi = lo, rmi = (run[lo] + run[hi]) >>> 1;
3067         while (run[++mi + 1] <= rmi);
3068 
3069         /*
3070          * Merge the left and right parts.
3071          */
3072         float[] a1, a2;
3073 
3074         if (parallel && hi - lo > MIN_RUN_COUNT) {
3075             RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe();
3076             a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi);
3077             a2 = (float[]) merger.getDestination();
3078         } else {
3079             a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi);
3080             a2 = mergeRuns(a, b, offset,    0, false, run, mi, hi);
3081         }
3082 
3083         float[] dst = a1 == a ? b : a;
3084 
3085         int k   = a1 == a ? run[lo] - offset : run[lo];
3086         int lo1 = a1 == b ? run[lo] - offset : run[lo];
3087         int hi1 = a1 == b ? run[mi] - offset : run[mi];
3088         int lo2 = a2 == b ? run[mi] - offset : run[mi];
3089         int hi2 = a2 == b ? run[hi] - offset : run[hi];
3090 
3091         if (parallel) {
3092             new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke();
3093         } else {
3094             mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2);
3095         }
3096         return dst;
3097     }
3098 
3099     /**
3100      * Merges the sorted parts.
3101      *
3102      * @param merger parallel context
3103      * @param dst the destination where parts are merged
3104      * @param k the start index of the destination, inclusive
3105      * @param a1 the first part
3106      * @param lo1 the start index of the first part, inclusive
3107      * @param hi1 the end index of the first part, exclusive
3108      * @param a2 the second part
3109      * @param lo2 the start index of the second part, inclusive
3110      * @param hi2 the end index of the second part, exclusive
3111      */
mergeParts(Merger merger, float[] dst, int k, float[] a1, int lo1, int hi1, float[] a2, int lo2, int hi2)3112     private static void mergeParts(Merger merger, float[] dst, int k,
3113             float[] a1, int lo1, int hi1, float[] a2, int lo2, int hi2) {
3114 
3115         if (merger != null && a1 == a2) {
3116 
3117             while (true) {
3118 
3119                 /*
3120                  * The first part must be larger.
3121                  */
3122                 if (hi1 - lo1 < hi2 - lo2) {
3123                     int lo = lo1; lo1 = lo2; lo2 = lo;
3124                     int hi = hi1; hi1 = hi2; hi2 = hi;
3125                 }
3126 
3127                 /*
3128                  * Small parts will be merged sequentially.
3129                  */
3130                 if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) {
3131                     break;
3132                 }
3133 
3134                 /*
3135                  * Find the median of the larger part.
3136                  */
3137                 int mi1 = (lo1 + hi1) >>> 1;
3138                 float key = a1[mi1];
3139                 int mi2 = hi2;
3140 
3141                 /*
3142                  * Partition the smaller part.
3143                  */
3144                 for (int loo = lo2; loo < mi2; ) {
3145                     int t = (loo + mi2) >>> 1;
3146 
3147                     if (key > a2[t]) {
3148                         loo = t + 1;
3149                     } else {
3150                         mi2 = t;
3151                     }
3152                 }
3153 
3154                 int d = mi2 - lo2 + mi1 - lo1;
3155 
3156                 /*
3157                  * Merge the right sub-parts in parallel.
3158                  */
3159                 merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2);
3160 
3161                 /*
3162                  * Process the sub-left parts.
3163                  */
3164                 hi1 = mi1;
3165                 hi2 = mi2;
3166             }
3167         }
3168 
3169         /*
3170          * Merge small parts sequentially.
3171          */
3172         while (lo1 < hi1 && lo2 < hi2) {
3173             dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++];
3174         }
3175         if (dst != a1 || k < lo1) {
3176             while (lo1 < hi1) {
3177                 dst[k++] = a1[lo1++];
3178             }
3179         }
3180         if (dst != a2 || k < lo2) {
3181             while (lo2 < hi2) {
3182                 dst[k++] = a2[lo2++];
3183             }
3184         }
3185     }
3186 
3187 // [double]
3188 
3189     /**
3190      * Sorts the specified range of the array using parallel merge
3191      * sort and/or Dual-Pivot Quicksort.
3192      *
3193      * To balance the faster splitting and parallelism of merge sort
3194      * with the faster element partitioning of Quicksort, ranges are
3195      * subdivided in tiers such that, if there is enough parallelism,
3196      * the four-way parallel merge is started, still ensuring enough
3197      * parallelism to process the partitions.
3198      *
3199      * @param a the array to be sorted
3200      * @param parallelism the parallelism level
3201      * @param low the index of the first element, inclusive, to be sorted
3202      * @param high the index of the last element, exclusive, to be sorted
3203      */
3204     static void sort(double[] a, int parallelism, int low, int high) {
3205         /*
3206          * Phase 1. Count the number of negative zero -0.0d,
3207          * turn them into positive zero, and move all NaNs
3208          * to the end of the array.
3209          */
3210         int numNegativeZero = 0;
3211 
3212         for (int k = high; k > low; ) {
3213             double ak = a[--k];
3214 
3215             if (ak == 0.0d && Double.doubleToRawLongBits(ak) < 0) { // ak is -0.0d
3216                 numNegativeZero += 1;
3217                 a[k] = 0.0d;
3218             } else if (ak != ak) { // ak is NaN
3219                 a[k] = a[--high];
3220                 a[high] = ak;
3221             }
3222         }
3223 
3224         /*
3225          * Phase 2. Sort everything except NaNs,
3226          * which are already in place.
3227          */
3228         int size = high - low;
3229 
3230         if (parallelism > 1 && size > MIN_PARALLEL_SORT_SIZE) {
3231             int depth = getDepth(parallelism, size >> 12);
3232             double[] b = depth == 0 ? null : new double[size];
3233             new Sorter(null, a, b, low, size, low, depth).invoke();
3234         } else {
3235             sort(null, a, 0, low, high);
3236         }
3237 
3238         /*
3239          * Phase 3. Turn positive zero 0.0d
3240          * back into negative zero -0.0d.
3241          */
3242         if (++numNegativeZero == 1) {
3243             return;
3244         }
3245 
3246         /*
3247          * Find the position one less than
3248          * the index of the first zero.
3249          */
3250         while (low <= high) {
3251             int middle = (low + high) >>> 1;
3252 
3253             if (a[middle] < 0) {
3254                 low = middle + 1;
3255             } else {
3256                 high = middle - 1;
3257             }
3258         }
3259 
3260         /*
3261          * Replace the required number of 0.0d by -0.0d.
3262          */
3263         while (--numNegativeZero > 0) {
3264             a[++high] = -0.0d;
3265         }
3266     }
3267 
3268     /**
3269      * Sorts the specified array using the Dual-Pivot Quicksort and/or
3270      * other sorts in special-cases, possibly with parallel partitions.
3271      *
3272      * @param sorter parallel context
3273      * @param a the array to be sorted
3274      * @param bits the combination of recursion depth and bit flag, where
3275      *        the right bit "0" indicates that array is the leftmost part
3276      * @param low the index of the first element, inclusive, to be sorted
3277      * @param high the index of the last element, exclusive, to be sorted
3278      */
sort(Sorter sorter, double[] a, int bits, int low, int high)3279     static void sort(Sorter sorter, double[] a, int bits, int low, int high) {
3280         while (true) {
3281             int end = high - 1, size = high - low;
3282 
3283             /*
3284              * Run mixed insertion sort on small non-leftmost parts.
3285              */
3286             if (size < MAX_MIXED_INSERTION_SORT_SIZE + bits && (bits & 1) > 0) {
3287                 mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high);
3288                 return;
3289             }
3290 
3291             /*
3292              * Invoke insertion sort on small leftmost part.
3293              */
3294             if (size < MAX_INSERTION_SORT_SIZE) {
3295                 insertionSort(a, low, high);
3296                 return;
3297             }
3298 
3299             /*
3300              * Check if the whole array or large non-leftmost
3301              * parts are nearly sorted and then merge runs.
3302              */
3303             if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0)
3304                     && tryMergeRuns(sorter, a, low, size)) {
3305                 return;
3306             }
3307 
3308             /*
3309              * Switch to heap sort if execution
3310              * time is becoming quadratic.
3311              */
3312             if ((bits += DELTA) > MAX_RECURSION_DEPTH) {
3313                 heapSort(a, low, high);
3314                 return;
3315             }
3316 
3317             /*
3318              * Use an inexpensive approximation of the golden ratio
3319              * to select five sample elements and determine pivots.
3320              */
3321             int step = (size >> 3) * 3 + 3;
3322 
3323             /*
3324              * Five elements around (and including) the central element
3325              * will be used for pivot selection as described below. The
3326              * unequal choice of spacing these elements was empirically
3327              * determined to work well on a wide variety of inputs.
3328              */
3329             int e1 = low + step;
3330             int e5 = end - step;
3331             int e3 = (e1 + e5) >>> 1;
3332             int e2 = (e1 + e3) >>> 1;
3333             int e4 = (e3 + e5) >>> 1;
3334             double a3 = a[e3];
3335 
3336             /*
3337              * Sort these elements in place by the combination
3338              * of 4-element sorting network and insertion sort.
3339              *
3340              *    5 ------o-----------o------------
3341              *            |           |
3342              *    4 ------|-----o-----o-----o------
3343              *            |     |           |
3344              *    2 ------o-----|-----o-----o------
3345              *                  |     |
3346              *    1 ------------o-----o------------
3347              */
3348             if (a[e5] < a[e2]) { double t = a[e5]; a[e5] = a[e2]; a[e2] = t; }
3349             if (a[e4] < a[e1]) { double t = a[e4]; a[e4] = a[e1]; a[e1] = t; }
3350             if (a[e5] < a[e4]) { double t = a[e5]; a[e5] = a[e4]; a[e4] = t; }
3351             if (a[e2] < a[e1]) { double t = a[e2]; a[e2] = a[e1]; a[e1] = t; }
3352             if (a[e4] < a[e2]) { double t = a[e4]; a[e4] = a[e2]; a[e2] = t; }
3353 
3354             if (a3 < a[e2]) {
3355                 if (a3 < a[e1]) {
3356                     a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3;
3357                 } else {
3358                     a[e3] = a[e2]; a[e2] = a3;
3359                 }
3360             } else if (a3 > a[e4]) {
3361                 if (a3 > a[e5]) {
3362                     a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3;
3363                 } else {
3364                     a[e3] = a[e4]; a[e4] = a3;
3365                 }
3366             }
3367 
3368             // Pointers
3369             int lower = low; // The index of the last element of the left part
3370             int upper = end; // The index of the first element of the right part
3371 
3372             /*
3373              * Partitioning with 2 pivots in case of different elements.
3374              */
3375             if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) {
3376 
3377                 /*
3378                  * Use the first and fifth of the five sorted elements as
3379                  * the pivots. These values are inexpensive approximation
3380                  * of tertiles. Note, that pivot1 < pivot2.
3381                  */
3382                 double pivot1 = a[e1];
3383                 double pivot2 = a[e5];
3384 
3385                 /*
3386                  * The first and the last elements to be sorted are moved
3387                  * to the locations formerly occupied by the pivots. When
3388                  * partitioning is completed, the pivots are swapped back
3389                  * into their final positions, and excluded from the next
3390                  * subsequent sorting.
3391                  */
3392                 a[e1] = a[lower];
3393                 a[e5] = a[upper];
3394 
3395                 /*
3396                  * Skip elements, which are less or greater than the pivots.
3397                  */
3398                 while (a[++lower] < pivot1);
3399                 while (a[--upper] > pivot2);
3400 
3401                 /*
3402                  * Backward 3-interval partitioning
3403                  *
3404                  *   left part                 central part          right part
3405                  * +------------------------------------------------------------+
3406                  * |  < pivot1  |   ?   |  pivot1 <= && <= pivot2  |  > pivot2  |
3407                  * +------------------------------------------------------------+
3408                  *             ^       ^                            ^
3409                  *             |       |                            |
3410                  *           lower     k                          upper
3411                  *
3412                  * Invariants:
3413                  *
3414                  *              all in (low, lower] < pivot1
3415                  *    pivot1 <= all in (k, upper)  <= pivot2
3416                  *              all in [upper, end) > pivot2
3417                  *
3418                  * Pointer k is the last index of ?-part
3419                  */
3420                 for (int unused = --lower, k = ++upper; --k > lower; ) {
3421                     double ak = a[k];
3422 
3423                     if (ak < pivot1) { // Move a[k] to the left side
3424                         while (lower < k) {
3425                             if (a[++lower] >= pivot1) {
3426                                 if (a[lower] > pivot2) {
3427                                     a[k] = a[--upper];
3428                                     a[upper] = a[lower];
3429                                 } else {
3430                                     a[k] = a[lower];
3431                                 }
3432                                 a[lower] = ak;
3433                                 break;
3434                             }
3435                         }
3436                     } else if (ak > pivot2) { // Move a[k] to the right side
3437                         a[k] = a[--upper];
3438                         a[upper] = ak;
3439                     }
3440                 }
3441 
3442                 /*
3443                  * Swap the pivots into their final positions.
3444                  */
3445                 a[low] = a[lower]; a[lower] = pivot1;
3446                 a[end] = a[upper]; a[upper] = pivot2;
3447 
3448                 /*
3449                  * Sort non-left parts recursively (possibly in parallel),
3450                  * excluding known pivots.
3451                  */
3452                 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) {
3453                     sorter.forkSorter(bits | 1, lower + 1, upper);
3454                     sorter.forkSorter(bits | 1, upper + 1, high);
3455                 } else {
3456                     sort(sorter, a, bits | 1, lower + 1, upper);
3457                     sort(sorter, a, bits | 1, upper + 1, high);
3458                 }
3459 
3460             } else { // Use single pivot in case of many equal elements
3461 
3462                 /*
3463                  * Use the third of the five sorted elements as the pivot.
3464                  * This value is inexpensive approximation of the median.
3465                  */
3466                 double pivot = a[e3];
3467 
3468                 /*
3469                  * The first element to be sorted is moved to the
3470                  * location formerly occupied by the pivot. After
3471                  * completion of partitioning the pivot is swapped
3472                  * back into its final position, and excluded from
3473                  * the next subsequent sorting.
3474                  */
3475                 a[e3] = a[lower];
3476 
3477                 /*
3478                  * Traditional 3-way (Dutch National Flag) partitioning
3479                  *
3480                  *   left part                 central part    right part
3481                  * +------------------------------------------------------+
3482                  * |   < pivot   |     ?     |   == pivot   |   > pivot   |
3483                  * +------------------------------------------------------+
3484                  *              ^           ^                ^
3485                  *              |           |                |
3486                  *            lower         k              upper
3487                  *
3488                  * Invariants:
3489                  *
3490                  *   all in (low, lower] < pivot
3491                  *   all in (k, upper)  == pivot
3492                  *   all in [upper, end] > pivot
3493                  *
3494                  * Pointer k is the last index of ?-part
3495                  */
3496                 for (int k = ++upper; --k > lower; ) {
3497                     double ak = a[k];
3498 
3499                     if (ak != pivot) {
3500                         a[k] = pivot;
3501 
3502                         if (ak < pivot) { // Move a[k] to the left side
3503                             while (a[++lower] < pivot);
3504 
3505                             if (a[lower] > pivot) {
3506                                 a[--upper] = a[lower];
3507                             }
3508                             a[lower] = ak;
3509                         } else { // ak > pivot - Move a[k] to the right side
3510                             a[--upper] = ak;
3511                         }
3512                     }
3513                 }
3514 
3515                 /*
3516                  * Swap the pivot into its final position.
3517                  */
3518                 a[low] = a[lower]; a[lower] = pivot;
3519 
3520                 /*
3521                  * Sort the right part (possibly in parallel), excluding
3522                  * known pivot. All elements from the central part are
3523                  * equal and therefore already sorted.
3524                  */
3525                 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) {
3526                     sorter.forkSorter(bits | 1, upper, high);
3527                 } else {
3528                     sort(sorter, a, bits | 1, upper, high);
3529                 }
3530             }
3531             high = lower; // Iterate along the left part
3532         }
3533     }
3534 
3535     /**
3536      * Sorts the specified range of the array using mixed insertion sort.
3537      *
3538      * Mixed insertion sort is combination of simple insertion sort,
3539      * pin insertion sort and pair insertion sort.
3540      *
3541      * In the context of Dual-Pivot Quicksort, the pivot element
3542      * from the left part plays the role of sentinel, because it
3543      * is less than any elements from the given part. Therefore,
3544      * expensive check of the left range can be skipped on each
3545      * iteration unless it is the leftmost call.
3546      *
3547      * @param a the array to be sorted
3548      * @param low the index of the first element, inclusive, to be sorted
3549      * @param end the index of the last element for simple insertion sort
3550      * @param high the index of the last element, exclusive, to be sorted
3551      */
mixedInsertionSort(double[] a, int low, int end, int high)3552     private static void mixedInsertionSort(double[] a, int low, int end, int high) {
3553         if (end == high) {
3554 
3555             /*
3556              * Invoke simple insertion sort on tiny array.
3557              */
3558             for (int i; ++low < end; ) {
3559                 double ai = a[i = low];
3560 
3561                 while (ai < a[--i]) {
3562                     a[i + 1] = a[i];
3563                 }
3564                 a[i + 1] = ai;
3565             }
3566         } else {
3567 
3568             /*
3569              * Start with pin insertion sort on small part.
3570              *
3571              * Pin insertion sort is extended simple insertion sort.
3572              * The main idea of this sort is to put elements larger
3573              * than an element called pin to the end of array (the
3574              * proper area for such elements). It avoids expensive
3575              * movements of these elements through the whole array.
3576              */
3577             double pin = a[end];
3578 
3579             for (int i, p = high; ++low < end; ) {
3580                 double ai = a[i = low];
3581 
3582                 if (ai < a[i - 1]) { // Small element
3583 
3584                     /*
3585                      * Insert small element into sorted part.
3586                      */
3587                     a[i] = a[--i];
3588 
3589                     while (ai < a[--i]) {
3590                         a[i + 1] = a[i];
3591                     }
3592                     a[i + 1] = ai;
3593 
3594                 } else if (p > i && ai > pin) { // Large element
3595 
3596                     /*
3597                      * Find element smaller than pin.
3598                      */
3599                     while (a[--p] > pin);
3600 
3601                     /*
3602                      * Swap it with large element.
3603                      */
3604                     if (p > i) {
3605                         ai = a[p];
3606                         a[p] = a[i];
3607                     }
3608 
3609                     /*
3610                      * Insert small element into sorted part.
3611                      */
3612                     while (ai < a[--i]) {
3613                         a[i + 1] = a[i];
3614                     }
3615                     a[i + 1] = ai;
3616                 }
3617             }
3618 
3619             /*
3620              * Continue with pair insertion sort on remain part.
3621              */
3622             for (int i; low < high; ++low) {
3623                 double a1 = a[i = low], a2 = a[++low];
3624 
3625                 /*
3626                  * Insert two elements per iteration: at first, insert the
3627                  * larger element and then insert the smaller element, but
3628                  * from the position where the larger element was inserted.
3629                  */
3630                 if (a1 > a2) {
3631 
3632                     while (a1 < a[--i]) {
3633                         a[i + 2] = a[i];
3634                     }
3635                     a[++i + 1] = a1;
3636 
3637                     while (a2 < a[--i]) {
3638                         a[i + 1] = a[i];
3639                     }
3640                     a[i + 1] = a2;
3641 
3642                 } else if (a1 < a[i - 1]) {
3643 
3644                     while (a2 < a[--i]) {
3645                         a[i + 2] = a[i];
3646                     }
3647                     a[++i + 1] = a2;
3648 
3649                     while (a1 < a[--i]) {
3650                         a[i + 1] = a[i];
3651                     }
3652                     a[i + 1] = a1;
3653                 }
3654             }
3655         }
3656     }
3657 
3658     /**
3659      * Sorts the specified range of the array using insertion sort.
3660      *
3661      * @param a the array to be sorted
3662      * @param low the index of the first element, inclusive, to be sorted
3663      * @param high the index of the last element, exclusive, to be sorted
3664      */
insertionSort(double[] a, int low, int high)3665     private static void insertionSort(double[] a, int low, int high) {
3666         for (int i, k = low; ++k < high; ) {
3667             double ai = a[i = k];
3668 
3669             if (ai < a[i - 1]) {
3670                 while (--i >= low && ai < a[i]) {
3671                     a[i + 1] = a[i];
3672                 }
3673                 a[i + 1] = ai;
3674             }
3675         }
3676     }
3677 
3678     /**
3679      * Sorts the specified range of the array using heap sort.
3680      *
3681      * @param a the array to be sorted
3682      * @param low the index of the first element, inclusive, to be sorted
3683      * @param high the index of the last element, exclusive, to be sorted
3684      */
heapSort(double[] a, int low, int high)3685     private static void heapSort(double[] a, int low, int high) {
3686         for (int k = (low + high) >>> 1; k > low; ) {
3687             pushDown(a, --k, a[k], low, high);
3688         }
3689         while (--high > low) {
3690             double max = a[low];
3691             pushDown(a, low, a[high], low, high);
3692             a[high] = max;
3693         }
3694     }
3695 
3696     /**
3697      * Pushes specified element down during heap sort.
3698      *
3699      * @param a the given array
3700      * @param p the start index
3701      * @param value the given element
3702      * @param low the index of the first element, inclusive, to be sorted
3703      * @param high the index of the last element, exclusive, to be sorted
3704      */
pushDown(double[] a, int p, double value, int low, int high)3705     private static void pushDown(double[] a, int p, double value, int low, int high) {
3706         for (int k ;; a[p] = a[p = k]) {
3707             k = (p << 1) - low + 2; // Index of the right child
3708 
3709             if (k > high) {
3710                 break;
3711             }
3712             if (k == high || a[k] < a[k - 1]) {
3713                 --k;
3714             }
3715             if (a[k] <= value) {
3716                 break;
3717             }
3718         }
3719         a[p] = value;
3720     }
3721 
3722     /**
3723      * Tries to sort the specified range of the array.
3724      *
3725      * @param sorter parallel context
3726      * @param a the array to be sorted
3727      * @param low the index of the first element to be sorted
3728      * @param size the array size
3729      * @return true if finally sorted, false otherwise
3730      */
tryMergeRuns(Sorter sorter, double[] a, int low, int size)3731     private static boolean tryMergeRuns(Sorter sorter, double[] a, int low, int size) {
3732 
3733         /*
3734          * The run array is constructed only if initial runs are
3735          * long enough to continue, run[i] then holds start index
3736          * of the i-th sequence of elements in non-descending order.
3737          */
3738         int[] run = null;
3739         int high = low + size;
3740         int count = 1, last = low;
3741 
3742         /*
3743          * Identify all possible runs.
3744          */
3745         for (int k = low + 1; k < high; ) {
3746 
3747             /*
3748              * Find the end index of the current run.
3749              */
3750             if (a[k - 1] < a[k]) {
3751 
3752                 // Identify ascending sequence
3753                 while (++k < high && a[k - 1] <= a[k]);
3754 
3755             } else if (a[k - 1] > a[k]) {
3756 
3757                 // Identify descending sequence
3758                 while (++k < high && a[k - 1] >= a[k]);
3759 
3760                 // Reverse into ascending order
3761                 for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) {
3762                     double ai = a[i]; a[i] = a[j]; a[j] = ai;
3763                 }
3764             } else { // Identify constant sequence
3765                 for (double ak = a[k]; ++k < high && ak == a[k]; );
3766 
3767                 if (k < high) {
3768                     continue;
3769                 }
3770             }
3771 
3772             /*
3773              * Check special cases.
3774              */
3775             if (run == null) {
3776                 if (k == high) {
3777 
3778                     /*
3779                      * The array is monotonous sequence,
3780                      * and therefore already sorted.
3781                      */
3782                     return true;
3783                 }
3784 
3785                 if (k - low < MIN_FIRST_RUN_SIZE) {
3786 
3787                     /*
3788                      * The first run is too small
3789                      * to proceed with scanning.
3790                      */
3791                     return false;
3792                 }
3793 
3794                 run = new int[((size >> 10) | 0x7F) & 0x3FF];
3795                 run[0] = low;
3796 
3797             } else if (a[last - 1] > a[last]) {
3798 
3799                 if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) {
3800 
3801                     /*
3802                      * The first runs are not long
3803                      * enough to continue scanning.
3804                      */
3805                     return false;
3806                 }
3807 
3808                 if (++count == MAX_RUN_CAPACITY) {
3809 
3810                     /*
3811                      * Array is not highly structured.
3812                      */
3813                     return false;
3814                 }
3815 
3816                 if (count == run.length) {
3817 
3818                     /*
3819                      * Increase capacity of index array.
3820                      */
3821                     run = Arrays.copyOf(run, count << 1);
3822                 }
3823             }
3824             run[count] = (last = k);
3825         }
3826 
3827         /*
3828          * Merge runs of highly structured array.
3829          */
3830         if (count > 1) {
3831             double[] b; int offset = low;
3832 
3833             if (sorter == null || (b = (double[]) sorter.b) == null) {
3834                 b = new double[size];
3835             } else {
3836                 offset = sorter.offset;
3837             }
3838             mergeRuns(a, b, offset, 1, sorter != null, run, 0, count);
3839         }
3840         return true;
3841     }
3842 
3843     /**
3844      * Merges the specified runs.
3845      *
3846      * @param a the source array
3847      * @param b the temporary buffer used in merging
3848      * @param offset the start index in the source, inclusive
3849      * @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0)
3850      * @param parallel indicates whether merging is performed in parallel
3851      * @param run the start indexes of the runs, inclusive
3852      * @param lo the start index of the first run, inclusive
3853      * @param hi the start index of the last run, inclusive
3854      * @return the destination where runs are merged
3855      */
mergeRuns(double[] a, double[] b, int offset, int aim, boolean parallel, int[] run, int lo, int hi)3856     private static double[] mergeRuns(double[] a, double[] b, int offset,
3857             int aim, boolean parallel, int[] run, int lo, int hi) {
3858 
3859         if (hi - lo == 1) {
3860             if (aim >= 0) {
3861                 return a;
3862             }
3863             for (int i = run[hi], j = i - offset, low = run[lo]; i > low;
3864                 b[--j] = a[--i]
3865             );
3866             return b;
3867         }
3868 
3869         /*
3870          * Split into approximately equal parts.
3871          */
3872         int mi = lo, rmi = (run[lo] + run[hi]) >>> 1;
3873         while (run[++mi + 1] <= rmi);
3874 
3875         /*
3876          * Merge the left and right parts.
3877          */
3878         double[] a1, a2;
3879 
3880         if (parallel && hi - lo > MIN_RUN_COUNT) {
3881             RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe();
3882             a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi);
3883             a2 = (double[]) merger.getDestination();
3884         } else {
3885             a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi);
3886             a2 = mergeRuns(a, b, offset,    0, false, run, mi, hi);
3887         }
3888 
3889         double[] dst = a1 == a ? b : a;
3890 
3891         int k   = a1 == a ? run[lo] - offset : run[lo];
3892         int lo1 = a1 == b ? run[lo] - offset : run[lo];
3893         int hi1 = a1 == b ? run[mi] - offset : run[mi];
3894         int lo2 = a2 == b ? run[mi] - offset : run[mi];
3895         int hi2 = a2 == b ? run[hi] - offset : run[hi];
3896 
3897         if (parallel) {
3898             new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke();
3899         } else {
3900             mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2);
3901         }
3902         return dst;
3903     }
3904 
3905     /**
3906      * Merges the sorted parts.
3907      *
3908      * @param merger parallel context
3909      * @param dst the destination where parts are merged
3910      * @param k the start index of the destination, inclusive
3911      * @param a1 the first part
3912      * @param lo1 the start index of the first part, inclusive
3913      * @param hi1 the end index of the first part, exclusive
3914      * @param a2 the second part
3915      * @param lo2 the start index of the second part, inclusive
3916      * @param hi2 the end index of the second part, exclusive
3917      */
mergeParts(Merger merger, double[] dst, int k, double[] a1, int lo1, int hi1, double[] a2, int lo2, int hi2)3918     private static void mergeParts(Merger merger, double[] dst, int k,
3919             double[] a1, int lo1, int hi1, double[] a2, int lo2, int hi2) {
3920 
3921         if (merger != null && a1 == a2) {
3922 
3923             while (true) {
3924 
3925                 /*
3926                  * The first part must be larger.
3927                  */
3928                 if (hi1 - lo1 < hi2 - lo2) {
3929                     int lo = lo1; lo1 = lo2; lo2 = lo;
3930                     int hi = hi1; hi1 = hi2; hi2 = hi;
3931                 }
3932 
3933                 /*
3934                  * Small parts will be merged sequentially.
3935                  */
3936                 if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) {
3937                     break;
3938                 }
3939 
3940                 /*
3941                  * Find the median of the larger part.
3942                  */
3943                 int mi1 = (lo1 + hi1) >>> 1;
3944                 double key = a1[mi1];
3945                 int mi2 = hi2;
3946 
3947                 /*
3948                  * Partition the smaller part.
3949                  */
3950                 for (int loo = lo2; loo < mi2; ) {
3951                     int t = (loo + mi2) >>> 1;
3952 
3953                     if (key > a2[t]) {
3954                         loo = t + 1;
3955                     } else {
3956                         mi2 = t;
3957                     }
3958                 }
3959 
3960                 int d = mi2 - lo2 + mi1 - lo1;
3961 
3962                 /*
3963                  * Merge the right sub-parts in parallel.
3964                  */
3965                 merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2);
3966 
3967                 /*
3968                  * Process the sub-left parts.
3969                  */
3970                 hi1 = mi1;
3971                 hi2 = mi2;
3972             }
3973         }
3974 
3975         /*
3976          * Merge small parts sequentially.
3977          */
3978         while (lo1 < hi1 && lo2 < hi2) {
3979             dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++];
3980         }
3981         if (dst != a1 || k < lo1) {
3982             while (lo1 < hi1) {
3983                 dst[k++] = a1[lo1++];
3984             }
3985         }
3986         if (dst != a2 || k < lo2) {
3987             while (lo2 < hi2) {
3988                 dst[k++] = a2[lo2++];
3989             }
3990         }
3991     }
3992 
3993 // [class]
3994 
3995     /**
3996      * This class implements parallel sorting.
3997      */
3998     private static final class Sorter extends CountedCompleter<Void> {
3999         private static final long serialVersionUID = 20180818L;
4000         private final Object a, b;
4001         private final int low, size, offset, depth;
4002 
4003         private Sorter(CountedCompleter<?> parent,
4004                 Object a, Object b, int low, int size, int offset, int depth) {
4005             super(parent);
4006             this.a = a;
4007             this.b = b;
4008             this.low = low;
4009             this.size = size;
4010             this.offset = offset;
4011             this.depth = depth;
4012         }
4013 
4014         @Override
4015         public final void compute() {
4016             if (depth < 0) {
4017                 setPendingCount(2);
4018                 int half = size >> 1;
4019                 new Sorter(this, b, a, low, half, offset, depth + 1).fork();
4020                 new Sorter(this, b, a, low + half, size - half, offset, depth + 1).compute();
4021             } else {
4022                 if (a instanceof int[]) {
4023                     sort(this, (int[]) a, depth, low, low + size);
4024                 } else if (a instanceof long[]) {
4025                     sort(this, (long[]) a, depth, low, low + size);
4026                 } else if (a instanceof float[]) {
4027                     sort(this, (float[]) a, depth, low, low + size);
4028                 } else if (a instanceof double[]) {
4029                     sort(this, (double[]) a, depth, low, low + size);
4030                 } else {
4031                     throw new IllegalArgumentException(
4032                         "Unknown type of array: " + a.getClass().getName());
4033                 }
4034             }
4035             tryComplete();
4036         }
4037 
4038         @Override
4039         public final void onCompletion(CountedCompleter<?> caller) {
4040             if (depth < 0) {
4041                 int mi = low + (size >> 1);
4042                 boolean src = (depth & 1) == 0;
4043 
4044                 new Merger(null,
4045                     a,
4046                     src ? low : low - offset,
4047                     b,
4048                     src ? low - offset : low,
4049                     src ? mi - offset : mi,
4050                     b,
4051                     src ? mi - offset : mi,
4052                     src ? low + size - offset : low + size
4053                 ).invoke();
4054             }
4055         }
4056 
4057         private void forkSorter(int depth, int low, int high) {
4058             addToPendingCount(1);
4059             Object a = this.a; // Use local variable for performance
4060             new Sorter(this, a, b, low, high - low, offset, depth).fork();
4061         }
4062     }
4063 
4064     /**
4065      * This class implements parallel merging.
4066      */
4067     private static final class Merger extends CountedCompleter<Void> {
4068         private static final long serialVersionUID = 20180818L;
4069         private final Object dst, a1, a2;
4070         private final int k, lo1, hi1, lo2, hi2;
4071 
4072         private Merger(CountedCompleter<?> parent, Object dst, int k,
4073                 Object a1, int lo1, int hi1, Object a2, int lo2, int hi2) {
4074             super(parent);
4075             this.dst = dst;
4076             this.k = k;
4077             this.a1 = a1;
4078             this.lo1 = lo1;
4079             this.hi1 = hi1;
4080             this.a2 = a2;
4081             this.lo2 = lo2;
4082             this.hi2 = hi2;
4083         }
4084 
4085         @Override
4086         public final void compute() {
4087             if (dst instanceof int[]) {
4088                 mergeParts(this, (int[]) dst, k,
4089                     (int[]) a1, lo1, hi1, (int[]) a2, lo2, hi2);
4090             } else if (dst instanceof long[]) {
4091                 mergeParts(this, (long[]) dst, k,
4092                     (long[]) a1, lo1, hi1, (long[]) a2, lo2, hi2);
4093             } else if (dst instanceof float[]) {
4094                 mergeParts(this, (float[]) dst, k,
4095                     (float[]) a1, lo1, hi1, (float[]) a2, lo2, hi2);
4096             } else if (dst instanceof double[]) {
4097                 mergeParts(this, (double[]) dst, k,
4098                     (double[]) a1, lo1, hi1, (double[]) a2, lo2, hi2);
4099             } else {
4100                 throw new IllegalArgumentException(
4101                     "Unknown type of array: " + dst.getClass().getName());
4102             }
4103             propagateCompletion();
4104         }
4105 
4106         private void forkMerger(Object dst, int k,
4107                 Object a1, int lo1, int hi1, Object a2, int lo2, int hi2) {
4108             addToPendingCount(1);
4109             new Merger(this, dst, k, a1, lo1, hi1, a2, lo2, hi2).fork();
4110         }
4111     }
4112 
4113     /**
4114      * This class implements parallel merging of runs.
4115      */
4116     private static final class RunMerger extends RecursiveTask<Object> {
4117         private static final long serialVersionUID = 20180818L;
4118         private final Object a, b;
4119         private final int[] run;
4120         private final int offset, aim, lo, hi;
4121 
4122         private RunMerger(Object a, Object b, int offset,
4123                 int aim, int[] run, int lo, int hi) {
4124             this.a = a;
4125             this.b = b;
4126             this.offset = offset;
4127             this.aim = aim;
4128             this.run = run;
4129             this.lo = lo;
4130             this.hi = hi;
4131         }
4132 
4133         @Override
4134         protected final Object compute() {
4135             if (a instanceof int[]) {
4136                 return mergeRuns((int[]) a, (int[]) b, offset, aim, true, run, lo, hi);
4137             }
4138             if (a instanceof long[]) {
4139                 return mergeRuns((long[]) a, (long[]) b, offset, aim, true, run, lo, hi);
4140             }
4141             if (a instanceof float[]) {
4142                 return mergeRuns((float[]) a, (float[]) b, offset, aim, true, run, lo, hi);
4143             }
4144             if (a instanceof double[]) {
4145                 return mergeRuns((double[]) a, (double[]) b, offset, aim, true, run, lo, hi);
4146             }
4147             throw new IllegalArgumentException(
4148                 "Unknown type of array: " + a.getClass().getName());
4149         }
4150 
4151         private RunMerger forkMe() {
4152             fork();
4153             return this;
4154         }
4155 
4156         private Object getDestination() {
4157             join();
4158             return getRawResult();
4159         }
4160     }
4161 }
4162