1 /*
2  * Copyright (C) 2014 The Guava Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.common.math;
18 
19 import com.google.common.collect.ImmutableMap;
20 import java.math.RoundingMode;
21 import java.util.Arrays;
22 import java.util.Collection;
23 import java.util.Map;
24 
25 /**
26  * Enumerates several algorithms providing equivalent functionality to {@link Quantiles}, for use in
27  * {@link QuantilesBenchmark}. These algorithms each calculate either a single quantile or multiple
28  * quantiles. All algorithms modify the dataset they are given (the cost of a copy to avoid this
29  * will be constant across algorithms).
30  *
31  * @author Pete Gillin
32  * @since 20.0
33  */
34 enum QuantilesAlgorithm {
35 
36   /**
37    * Sorts the dataset, and picks values from it. When computing multiple quantiles, we sort once
38    * and pick multiple values.
39    */
40   SORTING {
41 
42     @Override
singleQuantile(int index, int scale, double[] dataset)43     double singleQuantile(int index, int scale, double[] dataset) {
44       Arrays.sort(dataset);
45       return singleQuantileFromSorted(index, scale, dataset);
46     }
47 
48     @Override
multipleQuantiles( Collection<Integer> indexes, int scale, double[] dataset)49     Map<Integer, Double> multipleQuantiles(
50         Collection<Integer> indexes, int scale, double[] dataset) {
51       Arrays.sort(dataset);
52       ImmutableMap.Builder<Integer, Double> builder = ImmutableMap.builder();
53       for (int index : indexes) {
54         builder.put(index, singleQuantileFromSorted(index, scale, dataset));
55       }
56       return builder.build();
57     }
58 
singleQuantileFromSorted(int index, int scale, double[] dataset)59     private double singleQuantileFromSorted(int index, int scale, double[] dataset) {
60       long numerator = (long) index * (dataset.length - 1);
61       int positionFloor = (int) LongMath.divide(numerator, scale, RoundingMode.DOWN);
62       int remainder = (int) (numerator - positionFloor * scale);
63       if (remainder == 0) {
64         return dataset[positionFloor];
65       } else {
66         double positionFrac = (double) remainder / scale;
67         return dataset[positionFloor]
68             + positionFrac * (dataset[positionFloor + 1] - dataset[positionFloor]);
69       }
70     }
71   },
72 
73   /**
74    * Uses quickselect. When calculating multiple quantiles, each quickselect starts from scratch.
75    */
76   QUICKSELECT {
77 
78     @Override
singleQuantile(int index, int scale, double[] dataset)79     double singleQuantile(int index, int scale, double[] dataset) {
80       long numerator = (long) index * (dataset.length - 1);
81       int positionFloor = (int) LongMath.divide(numerator, scale, RoundingMode.DOWN);
82       int remainder = (int) (numerator - positionFloor * scale);
83       double percentileFloor = select(positionFloor, dataset);
84       if (remainder == 0) {
85         return percentileFloor;
86       } else {
87         double percentileCeiling = getMinValue(dataset, positionFloor + 1);
88         double positionFrac = (double) remainder / scale;
89         return percentileFloor + positionFrac * (percentileCeiling - percentileFloor);
90       }
91     }
92 
93     @Override
multipleQuantiles( Collection<Integer> indexes, int scale, double[] dataset)94     Map<Integer, Double> multipleQuantiles(
95         Collection<Integer> indexes, int scale, double[] dataset) {
96       ImmutableMap.Builder<Integer, Double> builder = ImmutableMap.builder();
97       for (int index : indexes) {
98         builder.put(index, singleQuantile(index, scale, dataset));
99       }
100       return builder.build();
101     }
102   },
103 
104   /** Uses {@link Quantiles}. */
105   TARGET {
106 
107     @Override
singleQuantile(int index, int scale, double[] dataset)108     double singleQuantile(int index, int scale, double[] dataset) {
109       return Quantiles.scale(scale).index(index).computeInPlace(dataset);
110     }
111 
112     @Override
multipleQuantiles( Collection<Integer> indexes, int scale, double[] dataset)113     Map<Integer, Double> multipleQuantiles(
114         Collection<Integer> indexes, int scale, double[] dataset) {
115       return Quantiles.scale(scale).indexes(indexes).computeInPlace(dataset);
116     }
117   },
118   ;
119 
120   /**
121    * Calculates a single quantile. Equivalent to {@code
122    * Quantiles.scale(scale).index(index).computeInPlace(dataset)}.
123    */
singleQuantile(int index, int scale, double[] dataset)124   abstract double singleQuantile(int index, int scale, double[] dataset);
125 
126   /**
127    * Calculates multiple quantiles. Equivalent to {@code
128    * Quantiles.scale(scale).indexes(indexes).computeInPlace(dataset)}.
129    */
multipleQuantiles( Collection<Integer> indexes, int scale, double[] dataset)130   abstract Map<Integer, Double> multipleQuantiles(
131       Collection<Integer> indexes, int scale, double[] dataset);
132 
getMinValue(double[] array, int from)133   static double getMinValue(double[] array, int from) {
134     // This is basically a copy of com.google.math.Rank#getMinValue, with a small change in the
135     // method signature: we always search to the end of the array.
136     int min = from;
137     for (int i = from + 1; i < array.length; i++) {
138       if (array[min] > array[i]) {
139         min = i;
140       }
141     }
142     return array[min];
143   }
144 
select(int k, double[] array)145   static double select(int k, double[] array) {
146     // This is basically a copy of com.google.math.Rank#select, with a small change in the method
147     // signature: we make k 0-based rather than 1-based; and we drop from and to, and always work on
148     // the whole array.
149     int from = 0;
150     int to = array.length - 1;
151 
152     while (true) {
153       if (to <= from + 1) {
154         // Two or less elements left.
155         if (to == from + 1 && array[to] < array[from]) {
156           // Exactly two elements left.
157           swap(array, from, to);
158         }
159         return array[k];
160       } else {
161         int midIndex = (from + to) >>> 1;
162         // Choose the median of the elements at the from, to and mid indexes,
163         // and rearrange so that array[from]<=array[from+1], and
164         // array[to] => array[from + 1].
165 
166         swap(array, midIndex, from + 1);
167 
168         if (array[from] > array[to]) {
169           swap(array, from, to);
170         }
171         if (array[from + 1] > array[to]) {
172           swap(array, from + 1, to);
173         }
174         if (array[from] > array[from + 1]) {
175           swap(array, from, from + 1);
176         }
177 
178         // Perform a partition with the selected median.
179         int low = from + 1, high = to; // Indexes for partitioning.
180         double partition = array[from + 1]; // Choose partitioning element.
181         while (true) {
182           // Skip the elements smaller than the partition.
183           do {
184             low++;
185           } while (array[low] < partition);
186 
187           // Skip the elements larger than the partition.
188           do {
189             high--;
190           } while (array[high] > partition);
191           if (high < low) {
192             break; // Pointers crossed. Partitioning complete.
193           }
194           swap(array, low, high); // End of innermost loop.
195         }
196         array[from + 1] = array[high]; // Insert partitioning element.
197         array[high] = partition;
198 
199         // Continue the partition that contains the kth element.
200         if (high >= k) {
201           to = high - 1;
202         }
203         if (high <= k) {
204           from = low;
205         }
206       }
207     }
208   }
209 
swap(double[] array, int i, int j)210   private static void swap(double[] array, int i, int j) {
211     // This is a copy of com.google.math.Rank#swap.
212     double temp = array[i];
213     array[i] = array[j];
214     array[j] = temp;
215   }
216 }
217