1 /* 2 * Copyright (c) 2012, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 package java.util.stream; 26 27 import java.util.Collections; 28 import java.util.EnumSet; 29 import java.util.Objects; 30 import java.util.Set; 31 import java.util.function.BiConsumer; 32 import java.util.function.BinaryOperator; 33 import java.util.function.Function; 34 import java.util.function.Supplier; 35 36 // A compilation test for the code snippets in this class-level javadoc can be found at: 37 // test/jdk/java/util/stream/test/org/openjdk/tests/java/util/stream/CollectorExample.java 38 // The test needs to be updated if the examples in this javadoc change or new examples are added. 39 40 /** 41 * A <a href="package-summary.html#Reduction">mutable reduction operation</a> that 42 * accumulates input elements into a mutable result container, optionally transforming 43 * the accumulated result into a final representation after all input elements 44 * have been processed. Reduction operations can be performed either sequentially 45 * or in parallel. 46 * 47 * <p>Examples of mutable reduction operations include: 48 * accumulating elements into a {@code Collection}; concatenating 49 * strings using a {@code StringBuilder}; computing summary information about 50 * elements such as sum, min, max, or average; computing "pivot table" summaries 51 * such as "maximum valued transaction by seller", etc. The class {@link Collectors} 52 * provides implementations of many common mutable reductions. 53 * 54 * <p>A {@code Collector} is specified by four functions that work together to 55 * accumulate entries into a mutable result container, and optionally perform 56 * a final transform on the result. They are: <ul> 57 * <li>creation of a new result container ({@link #supplier()})</li> 58 * <li>incorporating a new data element into a result container ({@link #accumulator()})</li> 59 * <li>combining two result containers into one ({@link #combiner()})</li> 60 * <li>performing an optional final transform on the container ({@link #finisher()})</li> 61 * </ul> 62 * 63 * <p>Collectors also have a set of characteristics, such as 64 * {@link Characteristics#CONCURRENT}, that provide hints that can be used by a 65 * reduction implementation to provide better performance. 66 * 67 * <p>A sequential implementation of a reduction using a collector would 68 * create a single result container using the supplier function, and invoke the 69 * accumulator function once for each input element. A parallel implementation 70 * would partition the input, create a result container for each partition, 71 * accumulate the contents of each partition into a subresult for that partition, 72 * and then use the combiner function to merge the subresults into a combined 73 * result. 74 * 75 * <p>To ensure that sequential and parallel executions produce equivalent 76 * results, the collector functions must satisfy an <em>identity</em> and an 77 * <a href="package-summary.html#Associativity">associativity</a> constraints. 78 * 79 * <p>The identity constraint says that for any partially accumulated result, 80 * combining it with an empty result container must produce an equivalent 81 * result. That is, for a partially accumulated result {@code a} that is the 82 * result of any series of accumulator and combiner invocations, {@code a} must 83 * be equivalent to {@code combiner.apply(a, supplier.get())}. 84 * 85 * <p>The associativity constraint says that splitting the computation must 86 * produce an equivalent result. That is, for any input elements {@code t1} 87 * and {@code t2}, the results {@code r1} and {@code r2} in the computation 88 * below must be equivalent: 89 * <pre>{@code 90 * A a1 = supplier.get(); 91 * accumulator.accept(a1, t1); 92 * accumulator.accept(a1, t2); 93 * R r1 = finisher.apply(a1); // result without splitting 94 * 95 * A a2 = supplier.get(); 96 * accumulator.accept(a2, t1); 97 * A a3 = supplier.get(); 98 * accumulator.accept(a3, t2); 99 * R r2 = finisher.apply(combiner.apply(a2, a3)); // result with splitting 100 * } </pre> 101 * 102 * <p>For collectors that do not have the {@code UNORDERED} characteristic, 103 * two accumulated results {@code a1} and {@code a2} are equivalent if 104 * {@code finisher.apply(a1).equals(finisher.apply(a2))}. For unordered 105 * collectors, equivalence is relaxed to allow for non-equality related to 106 * differences in order. (For example, an unordered collector that accumulated 107 * elements to a {@code List} would consider two lists equivalent if they 108 * contained the same elements, ignoring order.) 109 * 110 * <p>Libraries that implement reduction based on {@code Collector}, such as 111 * {@link Stream#collect(Collector)}, must adhere to the following constraints: 112 * <ul> 113 * <li>The first argument passed to the accumulator function, both 114 * arguments passed to the combiner function, and the argument passed to the 115 * finisher function must be the result of a previous invocation of the 116 * result supplier, accumulator, or combiner functions.</li> 117 * <li>The implementation should not do anything with the result of any of 118 * the result supplier, accumulator, or combiner functions other than to 119 * pass them again to the accumulator, combiner, or finisher functions, 120 * or return them to the caller of the reduction operation.</li> 121 * <li>If a result is passed to the combiner or finisher 122 * function, and the same object is not returned from that function, it is 123 * never used again.</li> 124 * <li>Once a result is passed to the combiner or finisher function, it 125 * is never passed to the accumulator function again.</li> 126 * <li>For non-concurrent collectors, any result returned from the result 127 * supplier, accumulator, or combiner functions must be serially 128 * thread-confined. This enables collection to occur in parallel without 129 * the {@code Collector} needing to implement any additional synchronization. 130 * The reduction implementation must manage that the input is properly 131 * partitioned, that partitions are processed in isolation, and combining 132 * happens only after accumulation is complete.</li> 133 * <li>For concurrent collectors, an implementation is free to (but not 134 * required to) implement reduction concurrently. A concurrent reduction 135 * is one where the accumulator function is called concurrently from 136 * multiple threads, using the same concurrently-modifiable result container, 137 * rather than keeping the result isolated during accumulation. 138 * A concurrent reduction should only be applied if the collector has the 139 * {@link Characteristics#UNORDERED} characteristics or if the 140 * originating data is unordered.</li> 141 * </ul> 142 * 143 * <p>In addition to the predefined implementations in {@link Collectors}, the 144 * static factory methods {@link #of(Supplier, BiConsumer, BinaryOperator, Characteristics...)} 145 * can be used to construct collectors. For example, you could create a collector 146 * that accumulates widgets into a {@code TreeSet} with: 147 * 148 * <pre>{@code 149 * Collector<Widget, ?, TreeSet<Widget>> intoSet = 150 * Collector.of(TreeSet::new, TreeSet::add, 151 * (left, right) -> { left.addAll(right); return left; }); 152 * }</pre> 153 * 154 * (This behavior is also implemented by the predefined collector 155 * {@link Collectors#toCollection(Supplier)}). 156 * 157 * @apiNote 158 * Performing a reduction operation with a {@code Collector} should produce a 159 * result equivalent to: 160 * <pre>{@code 161 * A container = collector.supplier().get(); 162 * for (T t : data) 163 * collector.accumulator().accept(container, t); 164 * return collector.finisher().apply(container); 165 * }</pre> 166 * 167 * <p>However, the library is free to partition the input, perform the reduction 168 * on the partitions, and then use the combiner function to combine the partial 169 * results to achieve a parallel reduction. (Depending on the specific reduction 170 * operation, this may perform better or worse, depending on the relative cost 171 * of the accumulator and combiner functions.) 172 * 173 * <p>Collectors are designed to be <em>composed</em>; many of the methods 174 * in {@link Collectors} are functions that take a collector and produce 175 * a new collector. For example, given the following collector that computes 176 * the sum of the salaries of a stream of employees: 177 * 178 * <pre>{@code 179 * Collector<Employee, ?, Integer> summingSalaries 180 * = Collectors.summingInt(Employee::getSalary)) 181 * }</pre> 182 * 183 * If we wanted to create a collector to tabulate the sum of salaries by 184 * department, we could reuse the "sum of salaries" logic using 185 * {@link Collectors#groupingBy(Function, Collector)}: 186 * 187 * <pre>{@code 188 * Collector<Employee, ?, Map<Department, Integer>> summingSalariesByDept 189 * = Collectors.groupingBy(Employee::getDepartment, summingSalaries); 190 * }</pre> 191 * 192 * @see Stream#collect(Collector) 193 * @see Collectors 194 * 195 * @param <T> the type of input elements to the reduction operation 196 * @param <A> the mutable accumulation type of the reduction operation (often 197 * hidden as an implementation detail) 198 * @param <R> the result type of the reduction operation 199 * @since 1.8 200 */ 201 public interface Collector<T, A, R> { 202 /** 203 * A function that creates and returns a new mutable result container. 204 * 205 * @return a function which returns a new, mutable result container 206 */ supplier()207 Supplier<A> supplier(); 208 209 /** 210 * A function that folds a value into a mutable result container. 211 * 212 * @return a function which folds a value into a mutable result container 213 */ accumulator()214 BiConsumer<A, T> accumulator(); 215 216 /** 217 * A function that accepts two partial results and merges them. The 218 * combiner function may fold state from one argument into the other and 219 * return that, or may return a new result container. 220 * 221 * @return a function which combines two partial results into a combined 222 * result 223 */ combiner()224 BinaryOperator<A> combiner(); 225 226 /** 227 * Perform the final transformation from the intermediate accumulation type 228 * {@code A} to the final result type {@code R}. 229 * 230 * <p>If the characteristic {@code IDENTITY_FINISH} is 231 * set, this function may be presumed to be an identity transform with an 232 * unchecked cast from {@code A} to {@code R}. 233 * 234 * @return a function which transforms the intermediate result to the final 235 * result 236 */ finisher()237 Function<A, R> finisher(); 238 239 /** 240 * Returns a {@code Set} of {@code Collector.Characteristics} indicating 241 * the characteristics of this Collector. This set should be immutable. 242 * 243 * @return an immutable set of collector characteristics 244 */ characteristics()245 Set<Characteristics> characteristics(); 246 247 /** 248 * Returns a new {@code Collector} described by the given {@code supplier}, 249 * {@code accumulator}, and {@code combiner} functions. The resulting 250 * {@code Collector} has the {@code Collector.Characteristics.IDENTITY_FINISH} 251 * characteristic. 252 * 253 * @param supplier The supplier function for the new collector 254 * @param accumulator The accumulator function for the new collector 255 * @param combiner The combiner function for the new collector 256 * @param characteristics The collector characteristics for the new 257 * collector 258 * @param <T> The type of input elements for the new collector 259 * @param <R> The type of intermediate accumulation result, and final result, 260 * for the new collector 261 * @throws NullPointerException if any argument is null 262 * @return the new {@code Collector} 263 */ of(Supplier<R> supplier, BiConsumer<R, T> accumulator, BinaryOperator<R> combiner, Characteristics... characteristics)264 public static<T, R> Collector<T, R, R> of(Supplier<R> supplier, 265 BiConsumer<R, T> accumulator, 266 BinaryOperator<R> combiner, 267 Characteristics... characteristics) { 268 Objects.requireNonNull(supplier); 269 Objects.requireNonNull(accumulator); 270 Objects.requireNonNull(combiner); 271 Objects.requireNonNull(characteristics); 272 Set<Characteristics> cs = (characteristics.length == 0) 273 ? Collectors.CH_ID 274 : Collections.unmodifiableSet(EnumSet.of(Collector.Characteristics.IDENTITY_FINISH, 275 characteristics)); 276 return new Collectors.CollectorImpl<>(supplier, accumulator, combiner, cs); 277 } 278 279 /** 280 * Returns a new {@code Collector} described by the given {@code supplier}, 281 * {@code accumulator}, {@code combiner}, and {@code finisher} functions. 282 * 283 * @param supplier The supplier function for the new collector 284 * @param accumulator The accumulator function for the new collector 285 * @param combiner The combiner function for the new collector 286 * @param finisher The finisher function for the new collector 287 * @param characteristics The collector characteristics for the new 288 * collector 289 * @param <T> The type of input elements for the new collector 290 * @param <A> The intermediate accumulation type of the new collector 291 * @param <R> The final result type of the new collector 292 * @throws NullPointerException if any argument is null 293 * @return the new {@code Collector} 294 */ of(Supplier<A> supplier, BiConsumer<A, T> accumulator, BinaryOperator<A> combiner, Function<A, R> finisher, Characteristics... characteristics)295 public static<T, A, R> Collector<T, A, R> of(Supplier<A> supplier, 296 BiConsumer<A, T> accumulator, 297 BinaryOperator<A> combiner, 298 Function<A, R> finisher, 299 Characteristics... characteristics) { 300 Objects.requireNonNull(supplier); 301 Objects.requireNonNull(accumulator); 302 Objects.requireNonNull(combiner); 303 Objects.requireNonNull(finisher); 304 Objects.requireNonNull(characteristics); 305 Set<Characteristics> cs = Collectors.CH_NOID; 306 if (characteristics.length > 0) { 307 cs = EnumSet.noneOf(Characteristics.class); 308 Collections.addAll(cs, characteristics); 309 cs = Collections.unmodifiableSet(cs); 310 } 311 return new Collectors.CollectorImpl<>(supplier, accumulator, combiner, finisher, cs); 312 } 313 314 /** 315 * Characteristics indicating properties of a {@code Collector}, which can 316 * be used to optimize reduction implementations. 317 */ 318 enum Characteristics { 319 /** 320 * Indicates that this collector is <em>concurrent</em>, meaning that 321 * the result container can support the accumulator function being 322 * called concurrently with the same result container from multiple 323 * threads. 324 * 325 * <p>If a {@code CONCURRENT} collector is not also {@code UNORDERED}, 326 * then it should only be evaluated concurrently if applied to an 327 * unordered data source. 328 */ 329 CONCURRENT, 330 331 /** 332 * Indicates that the collection operation does not commit to preserving 333 * the encounter order of input elements. (This might be true if the 334 * result container has no intrinsic order, such as a {@link Set}.) 335 */ 336 UNORDERED, 337 338 /** 339 * Indicates that the finisher function is the identity function and 340 * can be elided. If set, it must be the case that an unchecked cast 341 * from A to R will succeed. 342 */ 343 IDENTITY_FINISH 344 } 345 } 346