1 /*
2  * Written by Doug Lea with assistance from members of JCP JSR-166
3  * Expert Group and released to the public domain, as explained at
4  * http://creativecommons.org/publicdomain/zero/1.0/
5  */
6 
7 package java.util.concurrent.atomic;
8 
9 import java.util.Arrays;
10 import java.util.concurrent.ThreadLocalRandom;
11 import java.util.function.DoubleBinaryOperator;
12 import java.util.function.LongBinaryOperator;
13 
14 /**
15  * A package-local class holding common representation and mechanics
16  * for classes supporting dynamic striping on 64bit values. The class
17  * extends Number so that concrete subclasses must publicly do so.
18  */
19 @SuppressWarnings("serial")
20 abstract class Striped64 extends Number {
21     /*
22      * This class maintains a lazily-initialized table of atomically
23      * updated variables, plus an extra "base" field. The table size
24      * is a power of two. Indexing uses masked per-thread hash codes.
25      * Nearly all declarations in this class are package-private,
26      * accessed directly by subclasses.
27      *
28      * Table entries are of class Cell; a variant of AtomicLong padded
29      * (via @Contended) to reduce cache contention. Padding is
30      * overkill for most Atomics because they are usually irregularly
31      * scattered in memory and thus don't interfere much with each
32      * other. But Atomic objects residing in arrays will tend to be
33      * placed adjacent to each other, and so will most often share
34      * cache lines (with a huge negative performance impact) without
35      * this precaution.
36      *
37      * In part because Cells are relatively large, we avoid creating
38      * them until they are needed.  When there is no contention, all
39      * updates are made to the base field.  Upon first contention (a
40      * failed CAS on base update), the table is initialized to size 2.
41      * The table size is doubled upon further contention until
42      * reaching the nearest power of two greater than or equal to the
43      * number of CPUS. Table slots remain empty (null) until they are
44      * needed.
45      *
46      * A single spinlock ("cellsBusy") is used for initializing and
47      * resizing the table, as well as populating slots with new Cells.
48      * There is no need for a blocking lock; when the lock is not
49      * available, threads try other slots (or the base).  During these
50      * retries, there is increased contention and reduced locality,
51      * which is still better than alternatives.
52      *
53      * The Thread probe fields maintained via ThreadLocalRandom serve
54      * as per-thread hash codes. We let them remain uninitialized as
55      * zero (if they come in this way) until they contend at slot
56      * 0. They are then initialized to values that typically do not
57      * often conflict with others.  Contention and/or table collisions
58      * are indicated by failed CASes when performing an update
59      * operation. Upon a collision, if the table size is less than
60      * the capacity, it is doubled in size unless some other thread
61      * holds the lock. If a hashed slot is empty, and lock is
62      * available, a new Cell is created. Otherwise, if the slot
63      * exists, a CAS is tried.  Retries proceed by "double hashing",
64      * using a secondary hash (Marsaglia XorShift) to try to find a
65      * free slot.
66      *
67      * The table size is capped because, when there are more threads
68      * than CPUs, supposing that each thread were bound to a CPU,
69      * there would exist a perfect hash function mapping threads to
70      * slots that eliminates collisions. When we reach capacity, we
71      * search for this mapping by randomly varying the hash codes of
72      * colliding threads.  Because search is random, and collisions
73      * only become known via CAS failures, convergence can be slow,
74      * and because threads are typically not bound to CPUS forever,
75      * may not occur at all. However, despite these limitations,
76      * observed contention rates are typically low in these cases.
77      *
78      * It is possible for a Cell to become unused when threads that
79      * once hashed to it terminate, as well as in the case where
80      * doubling the table causes no thread to hash to it under
81      * expanded mask.  We do not try to detect or remove such cells,
82      * under the assumption that for long-running instances, observed
83      * contention levels will recur, so the cells will eventually be
84      * needed again; and for short-lived ones, it does not matter.
85      */
86 
87     /**
88      * Padded variant of AtomicLong supporting only raw accesses plus CAS.
89      *
90      * JVM intrinsics note: It would be possible to use a release-only
91      * form of CAS here, if it were provided.
92      */
93     // @jdk.internal.vm.annotation.Contended // android-removed
94     static final class Cell {
95         volatile long value;
Cell(long x)96         Cell(long x) { value = x; }
cas(long cmp, long val)97         final boolean cas(long cmp, long val) {
98             return U.compareAndSwapLong(this, VALUE, cmp, val);
99         }
reset()100         final void reset() {
101             U.putLongVolatile(this, VALUE, 0L);
102         }
reset(long identity)103         final void reset(long identity) {
104             U.putLongVolatile(this, VALUE, identity);
105         }
106 
107         // Unsafe mechanics
108         private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
109         private static final long VALUE;
110         static {
111             try {
112                 VALUE = U.objectFieldOffset
113                     (Cell.class.getDeclaredField("value"));
114             } catch (ReflectiveOperationException e) {
115                 throw new Error(e);
116             }
117         }
118     }
119 
120     /** Number of CPUS, to place bound on table size */
121     static final int NCPU = Runtime.getRuntime().availableProcessors();
122 
123     /**
124      * Table of cells. When non-null, size is a power of 2.
125      */
126     transient volatile Cell[] cells;
127 
128     /**
129      * Base value, used mainly when there is no contention, but also as
130      * a fallback during table initialization races. Updated via CAS.
131      */
132     transient volatile long base;
133 
134     /**
135      * Spinlock (locked via CAS) used when resizing and/or creating Cells.
136      */
137     transient volatile int cellsBusy;
138 
139     /**
140      * Package-private default constructor.
141      */
Striped64()142     Striped64() {
143     }
144 
145     /**
146      * CASes the base field.
147      */
casBase(long cmp, long val)148     final boolean casBase(long cmp, long val) {
149         return U.compareAndSwapLong(this, BASE, cmp, val);
150     }
151 
152     /**
153      * CASes the cellsBusy field from 0 to 1 to acquire lock.
154      */
casCellsBusy()155     final boolean casCellsBusy() {
156         return U.compareAndSwapInt(this, CELLSBUSY, 0, 1);
157     }
158 
159     /**
160      * Returns the probe value for the current thread.
161      * Duplicated from ThreadLocalRandom because of packaging restrictions.
162      */
getProbe()163     static final int getProbe() {
164         return U.getInt(Thread.currentThread(), PROBE);
165     }
166 
167     /**
168      * Pseudo-randomly advances and records the given probe value for the
169      * given thread.
170      * Duplicated from ThreadLocalRandom because of packaging restrictions.
171      */
advanceProbe(int probe)172     static final int advanceProbe(int probe) {
173         probe ^= probe << 13;   // xorshift
174         probe ^= probe >>> 17;
175         probe ^= probe << 5;
176         U.putInt(Thread.currentThread(), PROBE, probe);
177         return probe;
178     }
179 
180     /**
181      * Handles cases of updates involving initialization, resizing,
182      * creating new Cells, and/or contention. See above for
183      * explanation. This method suffers the usual non-modularity
184      * problems of optimistic retry code, relying on rechecked sets of
185      * reads.
186      *
187      * @param x the value
188      * @param fn the update function, or null for add (this convention
189      * avoids the need for an extra field or function in LongAdder).
190      * @param wasUncontended false if CAS failed before call
191      */
longAccumulate(long x, LongBinaryOperator fn, boolean wasUncontended)192     final void longAccumulate(long x, LongBinaryOperator fn,
193                               boolean wasUncontended) {
194         int h;
195         if ((h = getProbe()) == 0) {
196             ThreadLocalRandom.current(); // force initialization
197             h = getProbe();
198             wasUncontended = true;
199         }
200         boolean collide = false;                // True if last slot nonempty
201         done: for (;;) {
202             Cell[] as; Cell a; int n; long v;
203             if ((as = cells) != null && (n = as.length) > 0) {
204                 if ((a = as[(n - 1) & h]) == null) {
205                     if (cellsBusy == 0) {       // Try to attach new Cell
206                         Cell r = new Cell(x);   // Optimistically create
207                         if (cellsBusy == 0 && casCellsBusy()) {
208                             try {               // Recheck under lock
209                                 Cell[] rs; int m, j;
210                                 if ((rs = cells) != null &&
211                                     (m = rs.length) > 0 &&
212                                     rs[j = (m - 1) & h] == null) {
213                                     rs[j] = r;
214                                     break done;
215                                 }
216                             } finally {
217                                 cellsBusy = 0;
218                             }
219                             continue;           // Slot is now non-empty
220                         }
221                     }
222                     collide = false;
223                 }
224                 else if (!wasUncontended)       // CAS already known to fail
225                     wasUncontended = true;      // Continue after rehash
226                 else if (a.cas(v = a.value,
227                                (fn == null) ? v + x : fn.applyAsLong(v, x)))
228                     break;
229                 else if (n >= NCPU || cells != as)
230                     collide = false;            // At max size or stale
231                 else if (!collide)
232                     collide = true;
233                 else if (cellsBusy == 0 && casCellsBusy()) {
234                     try {
235                         if (cells == as)        // Expand table unless stale
236                             cells = Arrays.copyOf(as, n << 1);
237                     } finally {
238                         cellsBusy = 0;
239                     }
240                     collide = false;
241                     continue;                   // Retry with expanded table
242                 }
243                 h = advanceProbe(h);
244             }
245             else if (cellsBusy == 0 && cells == as && casCellsBusy()) {
246                 try {                           // Initialize table
247                     if (cells == as) {
248                         Cell[] rs = new Cell[2];
249                         rs[h & 1] = new Cell(x);
250                         cells = rs;
251                         break done;
252                     }
253                 } finally {
254                     cellsBusy = 0;
255                 }
256             }
257             // Fall back on using base
258             else if (casBase(v = base,
259                              (fn == null) ? v + x : fn.applyAsLong(v, x)))
260                 break done;
261         }
262     }
263 
apply(DoubleBinaryOperator fn, long v, double x)264     private static long apply(DoubleBinaryOperator fn, long v, double x) {
265         double d = Double.longBitsToDouble(v);
266         d = (fn == null) ? d + x : fn.applyAsDouble(d, x);
267         return Double.doubleToRawLongBits(d);
268     }
269 
270     /**
271      * Same as longAccumulate, but injecting long/double conversions
272      * in too many places to sensibly merge with long version, given
273      * the low-overhead requirements of this class. So must instead be
274      * maintained by copy/paste/adapt.
275      */
doubleAccumulate(double x, DoubleBinaryOperator fn, boolean wasUncontended)276     final void doubleAccumulate(double x, DoubleBinaryOperator fn,
277                                 boolean wasUncontended) {
278         int h;
279         if ((h = getProbe()) == 0) {
280             ThreadLocalRandom.current(); // force initialization
281             h = getProbe();
282             wasUncontended = true;
283         }
284         boolean collide = false;                // True if last slot nonempty
285         done: for (;;) {
286             Cell[] as; Cell a; int n; long v;
287             if ((as = cells) != null && (n = as.length) > 0) {
288                 if ((a = as[(n - 1) & h]) == null) {
289                     if (cellsBusy == 0) {       // Try to attach new Cell
290                         Cell r = new Cell(Double.doubleToRawLongBits(x));
291                         if (cellsBusy == 0 && casCellsBusy()) {
292                             try {               // Recheck under lock
293                                 Cell[] rs; int m, j;
294                                 if ((rs = cells) != null &&
295                                     (m = rs.length) > 0 &&
296                                     rs[j = (m - 1) & h] == null) {
297                                     rs[j] = r;
298                                     break done;
299                                 }
300                             } finally {
301                                 cellsBusy = 0;
302                             }
303                             continue;           // Slot is now non-empty
304                         }
305                     }
306                     collide = false;
307                 }
308                 else if (!wasUncontended)       // CAS already known to fail
309                     wasUncontended = true;      // Continue after rehash
310                 else if (a.cas(v = a.value, apply(fn, v, x)))
311                     break;
312                 else if (n >= NCPU || cells != as)
313                     collide = false;            // At max size or stale
314                 else if (!collide)
315                     collide = true;
316                 else if (cellsBusy == 0 && casCellsBusy()) {
317                     try {
318                         if (cells == as)        // Expand table unless stale
319                             cells = Arrays.copyOf(as, n << 1);
320                     } finally {
321                         cellsBusy = 0;
322                     }
323                     collide = false;
324                     continue;                   // Retry with expanded table
325                 }
326                 h = advanceProbe(h);
327             }
328             else if (cellsBusy == 0 && cells == as && casCellsBusy()) {
329                 try {                           // Initialize table
330                     if (cells == as) {
331                         Cell[] rs = new Cell[2];
332                         rs[h & 1] = new Cell(Double.doubleToRawLongBits(x));
333                         cells = rs;
334                         break done;
335                     }
336                 } finally {
337                     cellsBusy = 0;
338                 }
339             }
340             // Fall back on using base
341             else if (casBase(v = base, apply(fn, v, x)))
342                 break done;
343         }
344     }
345 
346     // Unsafe mechanics
347     private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
348     private static final long BASE;
349     private static final long CELLSBUSY;
350     private static final long PROBE;
351     static {
352         try {
353             BASE = U.objectFieldOffset
354                 (Striped64.class.getDeclaredField("base"));
355             CELLSBUSY = U.objectFieldOffset
356                 (Striped64.class.getDeclaredField("cellsBusy"));
357 
358             PROBE = U.objectFieldOffset
359                 (Thread.class.getDeclaredField("threadLocalRandomProbe"));
360         } catch (ReflectiveOperationException e) {
361             throw new Error(e);
362         }
363     }
364 
365 }
366