[ViewVC] Diff of: jsr166/jsr166/src/jsr166e/StripedAdder.java

Comparing jsr166/src/jsr166e/StripedAdder.java (file contents):
Revision 1.4 by dl, Sat Jul 23 16:32:53 2011 UTC vs.
Revision 1.7 by dl, Tue Jul 26 18:30:35 2011 UTC

#	Line 44 \| Line 44 \| public class StripedAdder implements Ser
44
45		/*
46		* A StripedAdder maintains a table of Atomic long variables. The
47	<	* table is indexed by per-thread hash codes that are initialized
48	<	* to random values.
47	>	* table is indexed by per-thread hash codes.
48		*
49	<	* The table doubles in size upon contention (as indicated by
50	<	* failed CASes when performing add()), but is capped at the
51	<	* nearest power of two >= #CPUS. This reflects the idea that,
52	<	* when there are more threads than CPUs, then if each thread were
53	<	* bound to a CPU, there would exist a perfect hash function
54	<	* mapping threads to slots that eliminates collisions. When we
55	<	* reach capacity, we search for this mapping by randomly varying
56	<	* the hash codes of colliding threads. Because search is random,
57	<	* and failures only become known via CAS failures, convergence
58	<	* will be slow, and because threads are typically not bound to
59	<	* CPUS forever, may not occur at all. However, despite these
60	<	* limitations, observed contention is typically low in these
61	<	* cases.
49	>	* By default, the table is lazily initialized, to minimize
50	>	* footprint until adders are used. On first use, the table is set
51	>	* to size DEFAULT_INITIAL_SIZE (currently 8). Table size is
52	>	* bounded by the number of CPUS (if larger than the default
53	>	* size).
54	>	*
55	>	* Per-thread hash codes are initialized to random values.
56	>	* Collisions are indicated by failed CASes when performing an add
57	>	* operation (see method retryAdd). Upon a collision, if the table
58	>	* size is less than the capacity, it is doubled in size unless
59	>	* some other thread holds lock. If a hashed slot is empty, and
60	>	* lock is available, a new Adder is created. Otherwise, if the
61	>	* slot exists, a CAS is tried. Retries proceed by "double
62	>	* hashing", using a secondary hash (Marsaglia XorShift) to try to
63	>	* find a free slot.
64	>	*
65	>	* The table size is capped because, when there are more threads
66	>	* than CPUs, supposing that each thread were bound to a CPU,
67	>	* there would exist a perfect hash function mapping threads to
68	>	* slots that eliminates collisions. When we reach capacity, we
69	>	* search for this mapping by randomly varying the hash codes of
70	>	* colliding threads. Because search is random, and failures only
71	>	* become known via CAS failures, convergence will be slow, and
72	>	* because threads are typically not bound to CPUS forever, may
73	>	* not occur at all. However, despite these limitations, observed
74	>	* contention is typically low in these cases.
75		*
76		* Table entries are of class Adder; a form of AtomicLong padded
77		* to reduce cache contention on most processors. Padding is
78	<	* overkill for most Atomics because they are most often
79	<	* irregularly scattered in memory and thus don't interfere much
80	<	* with each other. But Atomic objects residing in arrays will
81	<	* tend to be placed adjacent to each other, and so will most
82	<	* often share cache lines without this precaution. Adders are
78	>	* overkill for most Atomics because they are usually irregularly
79	>	* scattered in memory and thus don't interfere much with each
80	>	* other. But Atomic objects residing in arrays will tend to be
81	>	* placed adjacent to each other, and so will most often share
82	>	* cache lines without this precaution. Adders are by default
83		* constructed upon first use, which further improves per-thread
84	<	* locality and helps reduce (an already large) footprint.
84	>	* locality and helps reduce footprint.
85		*
86		* A single spinlock is used for resizing the table as well as
87		* populating slots with new Adders. Upon lock contention, threads
88		* try other slots rather than blocking. After initialization, at
89		* least one slot exists, so retries will eventually find a
90	<	* candidate Adder. During these retries, there is increased
90	>	* candidate Adder. During these retries, there is increased
91		* contention and reduced locality, which is still better than
92		* alternatives.
93		*/
94
95		/**
84	–	* Number of processors, to place a cap on table growth.
85	–	*/
86	–	static final int NCPU = Runtime.getRuntime().availableProcessors();
87	–
88	–	/**
96		* Padded version of AtomicLong
97		*/
98		static final class Adder extends AtomicLong {
99	<	long p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pa, pb, pc, pd;
99	>	long p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pa, pb, pc, pd, pe;
100		Adder(long x) { super(x); }
101		}
102
103	+	private static final int NCPU = Runtime.getRuntime().availableProcessors();
104	+
105		/**
106	<	* Holder for the thread-local hash code. The code starts off with
107	<	* a given random value, but may be set to a different value upon
108	<	* collisions in retryAdd.
106	>	* Table bounds. DEFAULT_INITIAL_SIZE is the table size set upon
107	>	* first use under default constructor, and must be a power of
108	>	* two. There is not much point in making size a lot smaller than
109	>	* that of Adders though. CAP is the maximum allowed table size.
110	>	*/
111	>	private static final int DEFAULT_INITIAL_SIZE = 8;
112	>	private static final int CAP = Math.max(NCPU, DEFAULT_INITIAL_SIZE);
113	>
114	>	/**
115	>	* Holder for the thread-local hash code. The code is initially
116	>	* random, but may be set to a different value upon collisions.
117		*/
118		static final class HashCode {
119	+	static final Random rng = new Random();
120		int code;
121	<	HashCode(int h) { code = h; }
121	>	HashCode() {
122	>	int h = rng.nextInt();
123	>	code = (h == 0) ? 1 : h; // ensure nonzero
124	>	}
125		}
126
127		/**
128		* The corresponding ThreadLocal class
129		*/
130		static final class ThreadHashCode extends ThreadLocal<HashCode> {
131	<	static final Random rng = new Random();
111	<	public HashCode initialValue() {
112	<	int h = rng.nextInt();
113	<	return new HashCode((h == 0) ? 1 : h); // ensure nonzero
114	<	}
131	>	public HashCode initialValue() { return new HashCode(); }
132		}
133
134		/**
135		* Static per-thread hash codes. Shared across all StripedAdders
136	<	* because adjustments due to collisions in one table are likely
137	<	* to be appropriate for others.
136	>	* to reduce ThreadLocal pollution and because adjustments due to
137	>	* collisions in one table are likely to be appropriate for
138	>	* others.
139		*/
140		static final ThreadHashCode threadHashCode = new ThreadHashCode();
141
142		/**
143	<	* Table of adders. Minimum size 2. Size grows to be at most NCPU.
143	>	* Common placeholder for empty arrays.
144	>	*/
145	>	static final Adder[] EMPTY_ARRAY = new Adder[0];
146	>
147	>	/**
148	>	* Table of adders. Size is either zero or a power of two, grows
149	>	* to be at most CAP.
150		*/
151		private transient volatile Adder[] adders;
152
153		/**
154		* Serves as a lock when resizing and/or creating Adders. There
155	<	* is no need for a blocking lock: When busy, other threads try
156	<	* other slots.
155	>	* is no need for a blocking lock: Except during initialization
156	>	* races, when busy, other threads try other slots. However,
157	>	* during (double-checked) initializations, we use the
158	>	* "synchronized" lock on this object.
159		*/
160		private final AtomicInteger mutex;
161
#	Line 137 \| Line 163 \| public class StripedAdder implements Ser
163		* Creates a new adder with zero sum.
164		*/
165		public StripedAdder() {
166	+	this.adders = EMPTY_ARRAY;
167		this.mutex = new AtomicInteger();
168		// remaining initialization on first call to add.
169		}
#	Line 149 \| Line 176 \| public class StripedAdder implements Ser
176		* will concurrently update the sum.
177		*/
178		public StripedAdder(int expectedContention) {
179	<	int cap = (expectedContention < NCPU) ? expectedContention : NCPU;
180	<	int size = 2;
181	<	while (size < cap)
182	<	size <<= 1;
183	<	Adder[] as = new Adder[size];
184	<	for (int i = 0; i < size; ++i)
185	<	as[i] = new Adder(0);
186	<	this.adders = as;
179	>	if (expectedContention > 0) {
180	>	int cap = (expectedContention < CAP) ? expectedContention : CAP;
181	>	int size = 1;
182	>	while (size < cap)
183	>	size <<= 1;
184	>	Adder[] as = new Adder[size];
185	>	for (int i = 0; i < size; ++i)
186	>	as[i] = new Adder(0);
187	>	this.adders = as;
188	>	}
189	>	else
190	>	this.adders = EMPTY_ARRAY;
191		this.mutex = new AtomicInteger();
192		}
193
#	Line 168 \| Line 199 \| public class StripedAdder implements Ser
199		public void add(long x) {
200		Adder[] as; Adder a; int n; long v; // locals to hold volatile reads
201		HashCode hc = threadHashCode.get();
202	+	int h = hc.code;
203		if ((as = adders) == null \|\| (n = as.length) < 1 \|\|
204	<	(a = as[hc.code & (n - 1)]) == null \|\|
204	>	(a = as[(n - 1) & h]) == null \|\|
205		!a.compareAndSet(v = a.get(), v + x))
206		retryAdd(x, hc);
207		}
208
209		/**
210		* Handle cases of add involving initialization, resizing,
211	<	* creating new Adders, and/or contention.
211	>	* creating new Adders, and/or contention. See above for
212	>	* explanation.
213		*/
214		private void retryAdd(long x, HashCode hc) {
215		int h = hc.code;
216		final AtomicInteger mutex = this.mutex;
217	<	AtomicInteger lock = null; // nonnull when held
218	<	try {
219	<	for (;;) {
220	<	Adder[] as; Adder a; long v; int n, k; // locals for volatiles
221	<	boolean needLock = true;
222	<	if ((as = adders) == null \|\| (n = as.length) < 1) {
223	<	if (lock != null) // default-initialize
224	<	adders = new Adder[2];
217	>	int collisions = 1 - mutex.get(); // first guess: collides if not locked
218	>	for (;;) {
219	>	Adder[] as; Adder a; long v; int k, n;
220	>	while ((as = adders) == null \|\| (n = as.length) < 1) {
221	>	synchronized(mutex) { // Try to initialize
222	>	if (adders == as) {
223	>	Adder[] rs = new Adder[DEFAULT_INITIAL_SIZE];
224	>	rs[h & (DEFAULT_INITIAL_SIZE - 1)] = new Adder(0);
225	>	adders = rs;
226	>	}
227		}
228	<	else if ((a = as[k = h & (n - 1)]) == null) {
229	<	if (lock != null) { // attach new adder
230	<	as[k] = new Adder(x);
231	<	break;
228	>	collisions = 0;
229	>	}
230	>
231	>	if ((a = as[k = (n - 1) & h]) == null) { // Try to add slot
232	>	if (mutex.get() == 0 && mutex.compareAndSet(0, 1)) {
233	>	try {
234	>	if (adders == as && as[k] == null)
235	>	a = as[k] = new Adder(x);
236	>	} finally {
237	>	mutex.set(0);
238		}
239	+	if (a != null)
240	+	break;
241		}
242	<	else if (a.compareAndSet(v = a.get(), v + x))
243	<	break;
244	<	else if (n >= NCPU) // cannot expand
245	<	needLock = false;
246	<	else if (lock != null) // expand table
247	<	adders = Arrays.copyOf(as, n << 1);
248	<
249	<	if (lock == null) {
250	<	if (needLock && mutex.get() == 0 &&
251	<	mutex.compareAndSet(0, 1))
209	<	lock = mutex;
210	<	else { // try elsewhere
211	<	h ^= h << 13; // Marsaglia XorShift
212	<	h ^= h >>> 17;
213	<	h ^= h << 5;
242	>	collisions = 0;
243	>	}
244	>	else if (collisions != 0 && n < CAP && // Try to expand table
245	>	mutex.get() == 0 && mutex.compareAndSet(0, 1)) {
246	>	try {
247	>	if (adders == as) {
248	>	Adder[] rs = new Adder[n << 1];
249	>	for (int i = 0; i < n; ++i)
250	>	rs[i] = as[i];
251	>	adders = rs;
252		}
253	+	} finally {
254	+	mutex.set(0);
255		}
256	+	collisions = 0;
257		}
258	<	} finally {
259	<	if (lock != null)
260	<	lock.set(0);
258	>	else if (a.compareAndSet(v = a.get(), v + x))
259	>	break;
260	>	else
261	>	collisions = 1;
262	>	h ^= h << 13; // Rehash
263	>	h ^= h >>> 17;
264	>	h ^= h << 5;
265		}
266	<	if (hc.code != h) // avoid unneeded writes
222	<	hc.code = h;
266	>	hc.code = h;
267		}
268
269		/**

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing jsr166/src/jsr166e/StripedAdder.java (file contents): Revision 1.4 by dl, Sat Jul 23 16:32:53 2011 UTC vs. Revision 1.7 by dl, Tue Jul 26 18:30:35 2011 UTC

Diff Legend

Comparing jsr166/src/jsr166e/StripedAdder.java (file contents):
Revision 1.4 by dl, Sat Jul 23 16:32:53 2011 UTC vs.
Revision 1.7 by dl, Tue Jul 26 18:30:35 2011 UTC