26 |
|
* update a common sum that is used for purposes such as collecting |
27 |
|
* statistics. In this case, performance may be significantly faster |
28 |
|
* than using a shared {@link AtomicLong}, at the expense of using |
29 |
< |
* much more space. On the other hand, if it is known that only one |
30 |
< |
* thread can ever update the sum, performance may be significantly |
31 |
< |
* slower than just updating a local variable. |
29 |
> |
* more space. On the other hand, if it is known that only one thread |
30 |
> |
* can ever update the sum, performance may be significantly slower |
31 |
> |
* than just updating a local variable. |
32 |
|
* |
33 |
|
* <p>A StripedAdder may optionally be constructed with a given |
34 |
|
* expected contention level; i.e., the number of threads that are |
69 |
|
* find a free slot. |
70 |
|
* |
71 |
|
* By default, the table is lazily initialized. Upon first use, |
72 |
< |
* the table is set to size 2 (the minimum non-empty size), but |
73 |
< |
* containing only a single Adder. The maximum table size is |
74 |
< |
* bounded by nearest power of two >= the number of CPUS. The |
75 |
< |
* table size is capped because, when there are more threads than |
76 |
< |
* CPUs, supposing that each thread were bound to a CPU, there |
77 |
< |
* would exist a perfect hash function mapping threads to slots |
78 |
< |
* that eliminates collisions. When we reach capacity, we search |
79 |
< |
* for this mapping by randomly varying the hash codes of |
80 |
< |
* colliding threads. Because search is random, and failures only |
81 |
< |
* become known via CAS failures, convergence will be slow, and |
82 |
< |
* because threads are typically not bound to CPUS forever, may |
83 |
< |
* not occur at all. However, despite these limitations, observed |
84 |
< |
* contention is typically low in these cases. |
72 |
> |
* the table is set to size 1, and contains a single Adder. The |
73 |
> |
* maximum table size is bounded by nearest power of two >= the |
74 |
> |
* number of CPUS. The table size is capped because, when there |
75 |
> |
* are more threads than CPUs, supposing that each thread were |
76 |
> |
* bound to a CPU, there would exist a perfect hash function |
77 |
> |
* mapping threads to slots that eliminates collisions. When we |
78 |
> |
* reach capacity, we search for this mapping by randomly varying |
79 |
> |
* the hash codes of colliding threads. Because search is random, |
80 |
> |
* and failures only become known via CAS failures, convergence |
81 |
> |
* will be slow, and because threads are typically not bound to |
82 |
> |
* CPUS forever, may not occur at all. However, despite these |
83 |
> |
* limitations, observed contention is typically low in these |
84 |
> |
* cases. |
85 |
|
* |
86 |
|
* A single spinlock is used for resizing the table as well as |
87 |
|
* populating slots with new Adders. After initialization, there |
136 |
|
static final ThreadHashCode threadHashCode = new ThreadHashCode(); |
137 |
|
|
138 |
|
/** |
139 |
< |
* Table of adders. When non-null, size is a power of 2, at least 2. |
139 |
> |
* Table of adders. When non-null, size is a power of 2. |
140 |
|
*/ |
141 |
|
private transient volatile Adder[] adders; |
142 |
|
|
160 |
|
*/ |
161 |
|
public StripedAdder(int expectedContention) { |
162 |
|
int cap = (expectedContention < NCPU) ? expectedContention : NCPU; |
163 |
< |
int size = 2; |
163 |
> |
int size = 1; |
164 |
|
while (size < cap) |
165 |
|
size <<= 1; |
166 |
|
Adder[] as = new Adder[size]; |
178 |
|
Adder[] as; Adder a; int n; // locals to hold volatile reads |
179 |
|
HashCode hc = threadHashCode.get(); |
180 |
|
int h = hc.code; |
181 |
< |
boolean collide; |
181 |
> |
boolean contended; |
182 |
|
if ((as = adders) != null && (n = as.length) > 0 && |
183 |
|
(a = as[(n - 1) & h]) != null) { |
184 |
|
long v = a.value; |
185 |
|
if (UNSAFE.compareAndSwapLong(a, valueOffset, v, v + x)) |
186 |
|
return; |
187 |
< |
collide = true; |
187 |
> |
contended = true; |
188 |
|
} |
189 |
|
else |
190 |
< |
collide = false; |
191 |
< |
retryAdd(x, hc, collide); |
190 |
> |
contended = false; |
191 |
> |
retryAdd(x, hc, contended); |
192 |
|
} |
193 |
|
|
194 |
|
/** |
197 |
|
* explanation. This method suffers the usual non-modularity |
198 |
|
* problems of optimistic retry code, relying on rechecked sets of |
199 |
|
* reads. |
200 |
+ |
* |
201 |
+ |
* @param x the value to add |
202 |
+ |
* @param hc the hash code holder |
203 |
+ |
* @param precontended true if CAS failed before call |
204 |
|
*/ |
205 |
< |
private void retryAdd(long x, HashCode hc, boolean collide) { |
205 |
> |
private void retryAdd(long x, HashCode hc, boolean precontended) { |
206 |
|
int h = hc.code; |
207 |
+ |
boolean collide = false; // true if last slot nonempty |
208 |
|
for (;;) { |
209 |
|
Adder[] as; Adder a; int n; |
210 |
|
if ((as = adders) != null && (n = as.length) > 0) { |
211 |
< |
if ((a = as[(n - 1) & h]) != null) { |
212 |
< |
boolean shared = true; // Slot exists |
208 |
< |
if (collide && n < NCPU && busy == 0 && |
209 |
< |
UNSAFE.compareAndSwapInt(this, busyOffset, 0, 1)) { |
210 |
< |
try { |
211 |
< |
if (adders == as) { // Expand table |
212 |
< |
Adder[] rs = new Adder[n << 1]; |
213 |
< |
for (int i = 0; i < n; ++i) |
214 |
< |
rs[i] = as[i]; |
215 |
< |
adders = rs; |
216 |
< |
shared = false; |
217 |
< |
} |
218 |
< |
} finally { |
219 |
< |
busy = 0; |
220 |
< |
} |
221 |
< |
if (shared || (h & n) != 0) { |
222 |
< |
collide = false; |
223 |
< |
continue; // Array or index changed |
224 |
< |
} |
225 |
< |
} |
226 |
< |
long v = a.value; |
227 |
< |
if (UNSAFE.compareAndSwapLong(a, valueOffset, v, v + x)) |
228 |
< |
break; |
229 |
< |
collide = shared; |
230 |
< |
} |
231 |
< |
else { // Try to attach new Adder |
232 |
< |
if (busy == 0 && |
211 |
> |
if ((a = as[(n - 1) & h]) == null) { |
212 |
> |
if (busy == 0 && // Try to attach new Adder |
213 |
|
UNSAFE.compareAndSwapInt(this, busyOffset, 0, 1)) { |
214 |
|
boolean created = false; |
215 |
|
try { // Recheck under lock |
228 |
|
} |
229 |
|
collide = false; |
230 |
|
} |
231 |
+ |
else if (precontended) // CAS already known to fail |
232 |
+ |
precontended = false; // Continue after rehash |
233 |
+ |
else { |
234 |
+ |
long v = a.value; |
235 |
+ |
if (UNSAFE.compareAndSwapLong(a, valueOffset, v, v + x)) |
236 |
+ |
break; |
237 |
+ |
if (!collide) |
238 |
+ |
collide = true; |
239 |
+ |
else if (n >= NCPU || adders != as) |
240 |
+ |
collide = false; // Don't expand |
241 |
+ |
else if (busy == 0 && |
242 |
+ |
UNSAFE.compareAndSwapInt(this, busyOffset, 0, 1)) { |
243 |
+ |
collide = false; |
244 |
+ |
try { |
245 |
+ |
if (adders == as) { // Expand table |
246 |
+ |
Adder[] rs = new Adder[n << 1]; |
247 |
+ |
for (int i = 0; i < n; ++i) |
248 |
+ |
rs[i] = as[i]; |
249 |
+ |
adders = rs; |
250 |
+ |
} |
251 |
+ |
} finally { |
252 |
+ |
busy = 0; |
253 |
+ |
} |
254 |
+ |
continue; |
255 |
+ |
} |
256 |
+ |
} |
257 |
|
h ^= h << 13; // Rehash |
258 |
|
h ^= h >>> 17; |
259 |
|
h ^= h << 5; |
260 |
|
} |
261 |
< |
else if (busy == 0) { // Default-initialize |
262 |
< |
Adder r = new Adder(x); |
263 |
< |
Adder[] rs = new Adder[2]; |
264 |
< |
rs[h & 1] = r; |
265 |
< |
if (adders == as && busy == 0 && |
266 |
< |
UNSAFE.compareAndSwapInt(this, busyOffset, 0, 1)) { |
267 |
< |
boolean init = false; |
268 |
< |
try { |
269 |
< |
if (adders == as) { |
270 |
< |
adders = rs; |
271 |
< |
init = true; |
261 |
> |
else if (adders == as) { // Try to default-initialize |
262 |
> |
Adder[] rs = new Adder[1]; |
263 |
> |
rs[0] = new Adder(x); |
264 |
> |
boolean init = false; |
265 |
> |
while (adders == as) { |
266 |
> |
if (UNSAFE.compareAndSwapInt(this, busyOffset, 0, 1)) { |
267 |
> |
try { |
268 |
> |
if (adders == as) { |
269 |
> |
adders = rs; |
270 |
> |
init = true; |
271 |
> |
} |
272 |
> |
} finally { |
273 |
> |
busy = 0; |
274 |
|
} |
275 |
< |
} finally { |
268 |
< |
busy = 0; |
275 |
> |
break; |
276 |
|
} |
277 |
< |
if (init) |
277 |
> |
if (adders != as) |
278 |
|
break; |
279 |
+ |
Thread.yield(); // Back off |
280 |
|
} |
281 |
+ |
if (init) |
282 |
+ |
break; |
283 |
|
} |
274 |
– |
else if (adders == as) // Lost initialization race |
275 |
– |
Thread.yield(); |
284 |
|
} |
285 |
|
hc.code = h; // Record index for next time |
286 |
|
} |