1 /* 2 * Copyright (C) 2014 The Guava Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.common.math; 18 19 import com.google.common.collect.ImmutableMap; 20 import java.math.RoundingMode; 21 import java.util.Arrays; 22 import java.util.Collection; 23 import java.util.Map; 24 25 /** 26 * Enumerates several algorithms providing equivalent functionality to {@link Quantiles}, for use in 27 * {@link QuantilesBenchmark}. These algorithms each calculate either a single quantile or multiple 28 * quantiles. All algorithms modify the dataset they are given (the cost of a copy to avoid this 29 * will be constant across algorithms). 30 * 31 * @author Pete Gillin 32 * @since 20.0 33 */ 34 enum QuantilesAlgorithm { 35 36 /** 37 * Sorts the dataset, and picks values from it. When computing multiple quantiles, we sort once 38 * and pick multiple values. 39 */ 40 SORTING { 41 42 @Override singleQuantile(int index, int scale, double[] dataset)43 double singleQuantile(int index, int scale, double[] dataset) { 44 Arrays.sort(dataset); 45 return singleQuantileFromSorted(index, scale, dataset); 46 } 47 48 @Override multipleQuantiles( Collection<Integer> indexes, int scale, double[] dataset)49 Map<Integer, Double> multipleQuantiles( 50 Collection<Integer> indexes, int scale, double[] dataset) { 51 Arrays.sort(dataset); 52 ImmutableMap.Builder<Integer, Double> builder = ImmutableMap.builder(); 53 for (int index : indexes) { 54 builder.put(index, singleQuantileFromSorted(index, scale, dataset)); 55 } 56 return builder.build(); 57 } 58 singleQuantileFromSorted(int index, int scale, double[] dataset)59 private double singleQuantileFromSorted(int index, int scale, double[] dataset) { 60 long numerator = (long) index * (dataset.length - 1); 61 int positionFloor = (int) LongMath.divide(numerator, scale, RoundingMode.DOWN); 62 int remainder = (int) (numerator - positionFloor * scale); 63 if (remainder == 0) { 64 return dataset[positionFloor]; 65 } else { 66 double positionFrac = (double) remainder / scale; 67 return dataset[positionFloor] 68 + positionFrac * (dataset[positionFloor + 1] - dataset[positionFloor]); 69 } 70 } 71 }, 72 73 /** 74 * Uses quickselect. When calculating multiple quantiles, each quickselect starts from scratch. 75 */ 76 QUICKSELECT { 77 78 @Override singleQuantile(int index, int scale, double[] dataset)79 double singleQuantile(int index, int scale, double[] dataset) { 80 long numerator = (long) index * (dataset.length - 1); 81 int positionFloor = (int) LongMath.divide(numerator, scale, RoundingMode.DOWN); 82 int remainder = (int) (numerator - positionFloor * scale); 83 double percentileFloor = select(positionFloor, dataset); 84 if (remainder == 0) { 85 return percentileFloor; 86 } else { 87 double percentileCeiling = getMinValue(dataset, positionFloor + 1); 88 double positionFrac = (double) remainder / scale; 89 return percentileFloor + positionFrac * (percentileCeiling - percentileFloor); 90 } 91 } 92 93 @Override multipleQuantiles( Collection<Integer> indexes, int scale, double[] dataset)94 Map<Integer, Double> multipleQuantiles( 95 Collection<Integer> indexes, int scale, double[] dataset) { 96 ImmutableMap.Builder<Integer, Double> builder = ImmutableMap.builder(); 97 for (int index : indexes) { 98 builder.put(index, singleQuantile(index, scale, dataset)); 99 } 100 return builder.build(); 101 } 102 }, 103 104 /** Uses {@link Quantiles}. */ 105 TARGET { 106 107 @Override singleQuantile(int index, int scale, double[] dataset)108 double singleQuantile(int index, int scale, double[] dataset) { 109 return Quantiles.scale(scale).index(index).computeInPlace(dataset); 110 } 111 112 @Override multipleQuantiles( Collection<Integer> indexes, int scale, double[] dataset)113 Map<Integer, Double> multipleQuantiles( 114 Collection<Integer> indexes, int scale, double[] dataset) { 115 return Quantiles.scale(scale).indexes(indexes).computeInPlace(dataset); 116 } 117 }, 118 ; 119 120 /** 121 * Calculates a single quantile. Equivalent to {@code 122 * Quantiles.scale(scale).index(index).computeInPlace(dataset)}. 123 */ singleQuantile(int index, int scale, double[] dataset)124 abstract double singleQuantile(int index, int scale, double[] dataset); 125 126 /** 127 * Calculates multiple quantiles. Equivalent to {@code 128 * Quantiles.scale(scale).indexes(indexes).computeInPlace(dataset)}. 129 */ multipleQuantiles( Collection<Integer> indexes, int scale, double[] dataset)130 abstract Map<Integer, Double> multipleQuantiles( 131 Collection<Integer> indexes, int scale, double[] dataset); 132 getMinValue(double[] array, int from)133 static double getMinValue(double[] array, int from) { 134 // This is basically a copy of com.google.math.Rank#getMinValue, with a small change in the 135 // method signature: we always search to the end of the array. 136 int min = from; 137 for (int i = from + 1; i < array.length; i++) { 138 if (array[min] > array[i]) { 139 min = i; 140 } 141 } 142 return array[min]; 143 } 144 select(int k, double[] array)145 static double select(int k, double[] array) { 146 // This is basically a copy of com.google.math.Rank#select, with a small change in the method 147 // signature: we make k 0-based rather than 1-based; and we drop from and to, and always work on 148 // the whole array. 149 int from = 0; 150 int to = array.length - 1; 151 152 while (true) { 153 if (to <= from + 1) { 154 // Two or less elements left. 155 if (to == from + 1 && array[to] < array[from]) { 156 // Exactly two elements left. 157 swap(array, from, to); 158 } 159 return array[k]; 160 } else { 161 int midIndex = (from + to) >>> 1; 162 // Choose the median of the elements at the from, to and mid indexes, 163 // and rearrange so that array[from]<=array[from+1], and 164 // array[to] => array[from + 1]. 165 166 swap(array, midIndex, from + 1); 167 168 if (array[from] > array[to]) { 169 swap(array, from, to); 170 } 171 if (array[from + 1] > array[to]) { 172 swap(array, from + 1, to); 173 } 174 if (array[from] > array[from + 1]) { 175 swap(array, from, from + 1); 176 } 177 178 // Perform a partition with the selected median. 179 int low = from + 1, high = to; // Indexes for partitioning. 180 double partition = array[from + 1]; // Choose partitioning element. 181 while (true) { 182 // Skip the elements smaller than the partition. 183 do { 184 low++; 185 } while (array[low] < partition); 186 187 // Skip the elements larger than the partition. 188 do { 189 high--; 190 } while (array[high] > partition); 191 if (high < low) { 192 break; // Pointers crossed. Partitioning complete. 193 } 194 swap(array, low, high); // End of innermost loop. 195 } 196 array[from + 1] = array[high]; // Insert partitioning element. 197 array[high] = partition; 198 199 // Continue the partition that contains the kth element. 200 if (high >= k) { 201 to = high - 1; 202 } 203 if (high <= k) { 204 from = low; 205 } 206 } 207 } 208 } 209 swap(double[] array, int i, int j)210 private static void swap(double[] array, int i, int j) { 211 // This is a copy of com.google.math.Rank#swap. 212 double temp = array[i]; 213 array[i] = array[j]; 214 array[j] = temp; 215 } 216 } 217