1 |
dl |
1.2 |
/* |
2 |
dl |
1.16 |
* Written by Doug Lea, Bill Scherer, and Michael Scott with |
3 |
|
|
* assistance from members of JCP JSR-166 Expert Group and released to |
4 |
|
|
* the public domain, as explained at |
5 |
jsr166 |
1.48 |
* http://creativecommons.org/publicdomain/zero/1.0/ |
6 |
dl |
1.2 |
*/ |
7 |
|
|
|
8 |
tim |
1.1 |
package java.util.concurrent; |
9 |
jsr166 |
1.54 |
import java.util.concurrent.atomic.AtomicInteger; |
10 |
|
|
import java.util.concurrent.atomic.AtomicReference; |
11 |
dl |
1.38 |
import java.util.concurrent.locks.LockSupport; |
12 |
tim |
1.1 |
|
13 |
|
|
/** |
14 |
dl |
1.28 |
* A synchronization point at which threads can pair and swap elements |
15 |
jsr166 |
1.39 |
* within pairs. Each thread presents some object on entry to the |
16 |
dl |
1.28 |
* {@link #exchange exchange} method, matches with a partner thread, |
17 |
jsr166 |
1.39 |
* and receives its partner's object on return. An Exchanger may be |
18 |
|
|
* viewed as a bidirectional form of a {@link SynchronousQueue}. |
19 |
|
|
* Exchangers may be useful in applications such as genetic algorithms |
20 |
|
|
* and pipeline designs. |
21 |
tim |
1.1 |
* |
22 |
|
|
* <p><b>Sample Usage:</b> |
23 |
jsr166 |
1.29 |
* Here are the highlights of a class that uses an {@code Exchanger} |
24 |
|
|
* to swap buffers between threads so that the thread filling the |
25 |
|
|
* buffer gets a freshly emptied one when it needs it, handing off the |
26 |
|
|
* filled one to the thread emptying the buffer. |
27 |
jsr166 |
1.50 |
* <pre> {@code |
28 |
tim |
1.1 |
* class FillAndEmpty { |
29 |
jsr166 |
1.29 |
* Exchanger<DataBuffer> exchanger = new Exchanger<DataBuffer>(); |
30 |
dl |
1.9 |
* DataBuffer initialEmptyBuffer = ... a made-up type |
31 |
|
|
* DataBuffer initialFullBuffer = ... |
32 |
tim |
1.1 |
* |
33 |
|
|
* class FillingLoop implements Runnable { |
34 |
|
|
* public void run() { |
35 |
dl |
1.9 |
* DataBuffer currentBuffer = initialEmptyBuffer; |
36 |
tim |
1.1 |
* try { |
37 |
|
|
* while (currentBuffer != null) { |
38 |
|
|
* addToBuffer(currentBuffer); |
39 |
dl |
1.30 |
* if (currentBuffer.isFull()) |
40 |
tim |
1.1 |
* currentBuffer = exchanger.exchange(currentBuffer); |
41 |
|
|
* } |
42 |
tim |
1.7 |
* } catch (InterruptedException ex) { ... handle ... } |
43 |
tim |
1.1 |
* } |
44 |
|
|
* } |
45 |
|
|
* |
46 |
|
|
* class EmptyingLoop implements Runnable { |
47 |
|
|
* public void run() { |
48 |
dl |
1.9 |
* DataBuffer currentBuffer = initialFullBuffer; |
49 |
tim |
1.1 |
* try { |
50 |
|
|
* while (currentBuffer != null) { |
51 |
|
|
* takeFromBuffer(currentBuffer); |
52 |
dl |
1.30 |
* if (currentBuffer.isEmpty()) |
53 |
tim |
1.1 |
* currentBuffer = exchanger.exchange(currentBuffer); |
54 |
|
|
* } |
55 |
tim |
1.7 |
* } catch (InterruptedException ex) { ... handle ...} |
56 |
tim |
1.1 |
* } |
57 |
|
|
* } |
58 |
|
|
* |
59 |
|
|
* void start() { |
60 |
|
|
* new Thread(new FillingLoop()).start(); |
61 |
|
|
* new Thread(new EmptyingLoop()).start(); |
62 |
|
|
* } |
63 |
jsr166 |
1.50 |
* }}</pre> |
64 |
tim |
1.1 |
* |
65 |
jsr166 |
1.27 |
* <p>Memory consistency effects: For each pair of threads that |
66 |
|
|
* successfully exchange objects via an {@code Exchanger}, actions |
67 |
|
|
* prior to the {@code exchange()} in each thread |
68 |
|
|
* <a href="package-summary.html#MemoryVisibility"><i>happen-before</i></a> |
69 |
|
|
* those subsequent to a return from the corresponding {@code exchange()} |
70 |
|
|
* in the other thread. |
71 |
brian |
1.22 |
* |
72 |
tim |
1.1 |
* @since 1.5 |
73 |
dl |
1.16 |
* @author Doug Lea and Bill Scherer and Michael Scott |
74 |
dl |
1.11 |
* @param <V> The type of objects that may be exchanged |
75 |
tim |
1.1 |
*/ |
76 |
|
|
public class Exchanger<V> { |
77 |
dl |
1.16 |
/* |
78 |
dl |
1.37 |
* Algorithm Description: |
79 |
dl |
1.16 |
* |
80 |
dl |
1.37 |
* The basic idea is to maintain a "slot", which is a reference to |
81 |
|
|
* a Node containing both an Item to offer and a "hole" waiting to |
82 |
jsr166 |
1.39 |
* get filled in. If an incoming "occupying" thread sees that the |
83 |
dl |
1.37 |
* slot is null, it CAS'es (compareAndSets) a Node there and waits |
84 |
jsr166 |
1.39 |
* for another to invoke exchange. That second "fulfilling" thread |
85 |
dl |
1.37 |
* sees that the slot is non-null, and so CASes it back to null, |
86 |
|
|
* also exchanging items by CASing the hole, plus waking up the |
87 |
|
|
* occupying thread if it is blocked. In each case CAS'es may |
88 |
|
|
* fail because a slot at first appears non-null but is null upon |
89 |
|
|
* CAS, or vice-versa. So threads may need to retry these |
90 |
|
|
* actions. |
91 |
|
|
* |
92 |
|
|
* This simple approach works great when there are only a few |
93 |
|
|
* threads using an Exchanger, but performance rapidly |
94 |
|
|
* deteriorates due to CAS contention on the single slot when |
95 |
jsr166 |
1.39 |
* there are lots of threads using an exchanger. So instead we use |
96 |
dl |
1.37 |
* an "arena"; basically a kind of hash table with a dynamically |
97 |
jsr166 |
1.39 |
* varying number of slots, any one of which can be used by |
98 |
|
|
* threads performing an exchange. Incoming threads pick slots |
99 |
|
|
* based on a hash of their Thread ids. If an incoming thread |
100 |
dl |
1.37 |
* fails to CAS in its chosen slot, it picks an alternative slot |
101 |
jsr166 |
1.39 |
* instead. And similarly from there. If a thread successfully |
102 |
dl |
1.37 |
* CASes into a slot but no other thread arrives, it tries |
103 |
|
|
* another, heading toward the zero slot, which always exists even |
104 |
jsr166 |
1.39 |
* if the table shrinks. The particular mechanics controlling this |
105 |
dl |
1.37 |
* are as follows: |
106 |
|
|
* |
107 |
|
|
* Waiting: Slot zero is special in that it is the only slot that |
108 |
jsr166 |
1.39 |
* exists when there is no contention. A thread occupying slot |
109 |
|
|
* zero will block if no thread fulfills it after a short spin. |
110 |
|
|
* In other cases, occupying threads eventually give up and try |
111 |
|
|
* another slot. Waiting threads spin for a while (a period that |
112 |
dl |
1.37 |
* should be a little less than a typical context-switch time) |
113 |
|
|
* before either blocking (if slot zero) or giving up (if other |
114 |
jsr166 |
1.39 |
* slots) and restarting. There is no reason for threads to block |
115 |
|
|
* unless there are unlikely to be any other threads present. |
116 |
|
|
* Occupants are mainly avoiding memory contention so sit there |
117 |
|
|
* quietly polling for a shorter period than it would take to |
118 |
|
|
* block and then unblock them. Non-slot-zero waits that elapse |
119 |
dl |
1.37 |
* because of lack of other threads waste around one extra |
120 |
|
|
* context-switch time per try, which is still on average much |
121 |
|
|
* faster than alternative approaches. |
122 |
|
|
* |
123 |
|
|
* Sizing: Usually, using only a few slots suffices to reduce |
124 |
|
|
* contention. Especially with small numbers of threads, using |
125 |
|
|
* too many slots can lead to just as poor performance as using |
126 |
jsr166 |
1.39 |
* too few of them, and there's not much room for error. The |
127 |
dl |
1.37 |
* variable "max" maintains the number of slots actually in |
128 |
jsr166 |
1.39 |
* use. It is increased when a thread sees too many CAS |
129 |
|
|
* failures. (This is analogous to resizing a regular hash table |
130 |
dl |
1.37 |
* based on a target load factor, except here, growth steps are |
131 |
jsr166 |
1.39 |
* just one-by-one rather than proportional.) Growth requires |
132 |
dl |
1.37 |
* contention failures in each of three tried slots. Requiring |
133 |
|
|
* multiple failures for expansion copes with the fact that some |
134 |
|
|
* failed CASes are not due to contention but instead to simple |
135 |
|
|
* races between two threads or thread pre-emptions occurring |
136 |
jsr166 |
1.39 |
* between reading and CASing. Also, very transient peak |
137 |
dl |
1.37 |
* contention can be much higher than the average sustainable |
138 |
dl |
1.47 |
* levels. An attempt to decrease the max limit is usually made |
139 |
|
|
* when a non-slot-zero wait elapses without being fulfilled. |
140 |
dl |
1.37 |
* Threads experiencing elapsed waits move closer to zero, so |
141 |
|
|
* eventually find existing (or future) threads even if the table |
142 |
jsr166 |
1.39 |
* has been shrunk due to inactivity. The chosen mechanics and |
143 |
dl |
1.37 |
* thresholds for growing and shrinking are intrinsically |
144 |
|
|
* entangled with indexing and hashing inside the exchange code, |
145 |
|
|
* and can't be nicely abstracted out. |
146 |
|
|
* |
147 |
|
|
* Hashing: Each thread picks its initial slot to use in accord |
148 |
jsr166 |
1.39 |
* with a simple hashcode. The sequence is the same on each |
149 |
dl |
1.37 |
* encounter by any given thread, but effectively random across |
150 |
|
|
* threads. Using arenas encounters the classic cost vs quality |
151 |
jsr166 |
1.39 |
* tradeoffs of all hash tables. Here, we use a one-step FNV-1a |
152 |
dl |
1.37 |
* hash code based on the current thread's Thread.getId(), along |
153 |
|
|
* with a cheap approximation to a mod operation to select an |
154 |
|
|
* index. The downside of optimizing index selection in this way |
155 |
|
|
* is that the code is hardwired to use a maximum table size of |
156 |
jsr166 |
1.39 |
* 32. But this value more than suffices for known platforms and |
157 |
dl |
1.37 |
* applications. |
158 |
|
|
* |
159 |
|
|
* Probing: On sensed contention of a selected slot, we probe |
160 |
|
|
* sequentially through the table, analogously to linear probing |
161 |
|
|
* after collision in a hash table. (We move circularly, in |
162 |
jsr166 |
1.39 |
* reverse order, to mesh best with table growth and shrinkage |
163 |
dl |
1.37 |
* rules.) Except that to minimize the effects of false-alarms |
164 |
|
|
* and cache thrashing, we try the first selected slot twice |
165 |
|
|
* before moving. |
166 |
|
|
* |
167 |
|
|
* Padding: Even with contention management, slots are heavily |
168 |
|
|
* contended, so use cache-padding to avoid poor memory |
169 |
jsr166 |
1.39 |
* performance. Because of this, slots are lazily constructed |
170 |
|
|
* only when used, to avoid wasting this space unnecessarily. |
171 |
|
|
* While isolation of locations is not much of an issue at first |
172 |
|
|
* in an application, as time goes on and garbage-collectors |
173 |
|
|
* perform compaction, slots are very likely to be moved adjacent |
174 |
|
|
* to each other, which can cause much thrashing of cache lines on |
175 |
|
|
* MPs unless padding is employed. |
176 |
dl |
1.37 |
* |
177 |
|
|
* This is an improvement of the algorithm described in the paper |
178 |
|
|
* "A Scalable Elimination-based Exchange Channel" by William |
179 |
|
|
* Scherer, Doug Lea, and Michael Scott in Proceedings of SCOOL05 |
180 |
jsr166 |
1.39 |
* workshop. Available at: http://hdl.handle.net/1802/2104 |
181 |
dl |
1.16 |
*/ |
182 |
dl |
1.2 |
|
183 |
dl |
1.32 |
/** The number of CPUs, for sizing and spin control */ |
184 |
dl |
1.37 |
private static final int NCPU = Runtime.getRuntime().availableProcessors(); |
185 |
dl |
1.32 |
|
186 |
jsr166 |
1.17 |
/** |
187 |
jsr166 |
1.39 |
* The capacity of the arena. Set to a value that provides more |
188 |
|
|
* than enough space to handle contention. On small machines |
189 |
|
|
* most slots won't be used, but it is still not wasted because |
190 |
|
|
* the extra space provides some machine-level address padding |
191 |
|
|
* to minimize interference with heavily CAS'ed Slot locations. |
192 |
|
|
* And on very large machines, performance eventually becomes |
193 |
|
|
* bounded by memory bandwidth, not numbers of threads/CPUs. |
194 |
|
|
* This constant cannot be changed without also modifying |
195 |
|
|
* indexing and hashing algorithms. |
196 |
dl |
1.37 |
*/ |
197 |
|
|
private static final int CAPACITY = 32; |
198 |
|
|
|
199 |
|
|
/** |
200 |
|
|
* The value of "max" that will hold all threads without |
201 |
jsr166 |
1.39 |
* contention. When this value is less than CAPACITY, some |
202 |
dl |
1.37 |
* otherwise wasted expansion can be avoided. |
203 |
|
|
*/ |
204 |
|
|
private static final int FULL = |
205 |
|
|
Math.max(0, Math.min(CAPACITY, NCPU / 2) - 1); |
206 |
|
|
|
207 |
|
|
/** |
208 |
|
|
* The number of times to spin (doing nothing except polling a |
209 |
|
|
* memory location) before blocking or giving up while waiting to |
210 |
jsr166 |
1.39 |
* be fulfilled. Should be zero on uniprocessors. On |
211 |
dl |
1.37 |
* multiprocessors, this value should be large enough so that two |
212 |
|
|
* threads exchanging items as fast as possible block only when |
213 |
|
|
* one of them is stalled (due to GC or preemption), but not much |
214 |
jsr166 |
1.39 |
* longer, to avoid wasting CPU resources. Seen differently, this |
215 |
dl |
1.37 |
* value is a little over half the number of cycles of an average |
216 |
jsr166 |
1.39 |
* context switch time on most systems. The value here is |
217 |
dl |
1.37 |
* approximately the average of those across a range of tested |
218 |
|
|
* systems. |
219 |
dl |
1.16 |
*/ |
220 |
dl |
1.37 |
private static final int SPINS = (NCPU == 1) ? 0 : 2000; |
221 |
dl |
1.34 |
|
222 |
|
|
/** |
223 |
dl |
1.37 |
* The number of times to spin before blocking in timed waits. |
224 |
|
|
* Timed waits spin more slowly because checking the time takes |
225 |
|
|
* time. The best value relies mainly on the relative rate of |
226 |
|
|
* System.nanoTime vs memory accesses. The value is empirically |
227 |
|
|
* derived to work well across a variety of systems. |
228 |
dl |
1.34 |
*/ |
229 |
dl |
1.37 |
private static final int TIMED_SPINS = SPINS / 20; |
230 |
dl |
1.34 |
|
231 |
|
|
/** |
232 |
dl |
1.37 |
* Sentinel item representing cancellation of a wait due to |
233 |
|
|
* interruption, timeout, or elapsed spin-waits. This value is |
234 |
|
|
* placed in holes on cancellation, and used as a return value |
235 |
|
|
* from waiting methods to indicate failure to set or get hole. |
236 |
dl |
1.34 |
*/ |
237 |
dl |
1.37 |
private static final Object CANCEL = new Object(); |
238 |
dl |
1.32 |
|
239 |
|
|
/** |
240 |
dl |
1.37 |
* Value representing null arguments/returns from public |
241 |
jsr166 |
1.39 |
* methods. This disambiguates from internal requirement that |
242 |
dl |
1.37 |
* holes start out as null to mean they are not yet set. |
243 |
dl |
1.32 |
*/ |
244 |
dl |
1.37 |
private static final Object NULL_ITEM = new Object(); |
245 |
dl |
1.32 |
|
246 |
|
|
/** |
247 |
jsr166 |
1.39 |
* Nodes hold partially exchanged data. This class |
248 |
dl |
1.37 |
* opportunistically subclasses AtomicReference to represent the |
249 |
jsr166 |
1.39 |
* hole. So get() returns hole, and compareAndSet CAS'es value |
250 |
|
|
* into hole. This class cannot be parameterized as "V" because |
251 |
|
|
* of the use of non-V CANCEL sentinels. |
252 |
dl |
1.32 |
*/ |
253 |
jsr166 |
1.51 |
@SuppressWarnings("serial") |
254 |
dl |
1.37 |
private static final class Node extends AtomicReference<Object> { |
255 |
|
|
/** The element offered by the Thread creating this node. */ |
256 |
|
|
public final Object item; |
257 |
|
|
|
258 |
|
|
/** The Thread waiting to be signalled; null until waiting. */ |
259 |
|
|
public volatile Thread waiter; |
260 |
dl |
1.32 |
|
261 |
dl |
1.37 |
/** |
262 |
|
|
* Creates node with given item and empty hole. |
263 |
|
|
* @param item the item |
264 |
|
|
*/ |
265 |
|
|
public Node(Object item) { |
266 |
|
|
this.item = item; |
267 |
|
|
} |
268 |
|
|
} |
269 |
dl |
1.16 |
|
270 |
jsr166 |
1.17 |
/** |
271 |
dl |
1.37 |
* A Slot is an AtomicReference with heuristic padding to lessen |
272 |
jsr166 |
1.39 |
* cache effects of this heavily CAS'ed location. While the |
273 |
dl |
1.37 |
* padding adds noticeable space, all slots are created only on |
274 |
|
|
* demand, and there will be more than one of them only when it |
275 |
|
|
* would improve throughput more than enough to outweigh using |
276 |
|
|
* extra space. |
277 |
|
|
*/ |
278 |
jsr166 |
1.51 |
@SuppressWarnings("serial") |
279 |
dl |
1.37 |
private static final class Slot extends AtomicReference<Object> { |
280 |
jsr166 |
1.49 |
// Improve likelihood of isolation on <= 128 byte cache lines. |
281 |
|
|
// We used to target 64 byte cache lines, but some x86s (including |
282 |
|
|
// i7 under some BIOSes) actually use 128 byte cache lines. |
283 |
dl |
1.37 |
long q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, qa, qb, qc, qd, qe; |
284 |
|
|
} |
285 |
dl |
1.5 |
|
286 |
dl |
1.34 |
/** |
287 |
dl |
1.37 |
* Slot array. Elements are lazily initialized when needed. |
288 |
|
|
* Declared volatile to enable double-checked lazy construction. |
289 |
dl |
1.34 |
*/ |
290 |
dl |
1.37 |
private volatile Slot[] arena = new Slot[CAPACITY]; |
291 |
dl |
1.5 |
|
292 |
dl |
1.16 |
/** |
293 |
jsr166 |
1.39 |
* The maximum slot index being used. The value sometimes |
294 |
dl |
1.37 |
* increases when a thread experiences too many CAS contentions, |
295 |
dl |
1.41 |
* and sometimes decreases when a spin-wait elapses. Changes |
296 |
dl |
1.37 |
* are performed only via compareAndSet, to avoid stale values |
297 |
|
|
* when a thread happens to stall right before setting. |
298 |
dl |
1.16 |
*/ |
299 |
dl |
1.37 |
private final AtomicInteger max = new AtomicInteger(); |
300 |
dl |
1.2 |
|
301 |
dl |
1.16 |
/** |
302 |
|
|
* Main exchange function, handling the different policy variants. |
303 |
|
|
* Uses Object, not "V" as argument and return value to simplify |
304 |
jsr166 |
1.39 |
* handling of sentinel values. Callers from public methods decode |
305 |
dl |
1.37 |
* and cast accordingly. |
306 |
dl |
1.30 |
* |
307 |
jsr166 |
1.40 |
* @param item the (non-null) item to exchange |
308 |
dl |
1.30 |
* @param timed true if the wait is timed |
309 |
|
|
* @param nanos if timed, the maximum wait time |
310 |
jsr166 |
1.39 |
* @return the other thread's item, or CANCEL if interrupted or timed out |
311 |
dl |
1.16 |
*/ |
312 |
dl |
1.37 |
private Object doExchange(Object item, boolean timed, long nanos) { |
313 |
|
|
Node me = new Node(item); // Create in case occupying |
314 |
|
|
int index = hashIndex(); // Index of current slot |
315 |
|
|
int fails = 0; // Number of CAS failures |
316 |
|
|
|
317 |
|
|
for (;;) { |
318 |
|
|
Object y; // Contents of current slot |
319 |
|
|
Slot slot = arena[index]; |
320 |
|
|
if (slot == null) // Lazily initialize slots |
321 |
|
|
createSlot(index); // Continue loop to reread |
322 |
|
|
else if ((y = slot.get()) != null && // Try to fulfill |
323 |
|
|
slot.compareAndSet(y, null)) { |
324 |
|
|
Node you = (Node)y; // Transfer item |
325 |
jsr166 |
1.40 |
if (you.compareAndSet(null, item)) { |
326 |
dl |
1.37 |
LockSupport.unpark(you.waiter); |
327 |
|
|
return you.item; |
328 |
|
|
} // Else cancelled; continue |
329 |
|
|
} |
330 |
|
|
else if (y == null && // Try to occupy |
331 |
|
|
slot.compareAndSet(null, me)) { |
332 |
|
|
if (index == 0) // Blocking wait for slot 0 |
333 |
jsr166 |
1.46 |
return timed ? |
334 |
|
|
awaitNanos(me, slot, nanos) : |
335 |
|
|
await(me, slot); |
336 |
dl |
1.37 |
Object v = spinWait(me, slot); // Spin wait for non-0 |
337 |
|
|
if (v != CANCEL) |
338 |
dl |
1.16 |
return v; |
339 |
jsr166 |
1.40 |
me = new Node(item); // Throw away cancelled node |
340 |
dl |
1.37 |
int m = max.get(); |
341 |
|
|
if (m > (index >>>= 1)) // Decrease index |
342 |
|
|
max.compareAndSet(m, m - 1); // Maybe shrink table |
343 |
dl |
1.2 |
} |
344 |
dl |
1.37 |
else if (++fails > 1) { // Allow 2 fails on 1st slot |
345 |
|
|
int m = max.get(); |
346 |
|
|
if (fails > 3 && m < FULL && max.compareAndSet(m, m + 1)) |
347 |
|
|
index = m + 1; // Grow on 3rd failed slot |
348 |
|
|
else if (--index < 0) |
349 |
|
|
index = m; // Circularly traverse |
350 |
dl |
1.34 |
} |
351 |
dl |
1.37 |
} |
352 |
|
|
} |
353 |
dl |
1.2 |
|
354 |
dl |
1.37 |
/** |
355 |
jsr166 |
1.39 |
* Returns a hash index for the current thread. Uses a one-step |
356 |
dl |
1.37 |
* FNV-1a hash code (http://www.isthe.com/chongo/tech/comp/fnv/) |
357 |
jsr166 |
1.39 |
* based on the current thread's Thread.getId(). These hash codes |
358 |
dl |
1.37 |
* have more uniform distribution properties with respect to small |
359 |
jsr166 |
1.39 |
* moduli (here 1-31) than do other simple hashing functions. |
360 |
jsr166 |
1.43 |
* |
361 |
|
|
* <p>To return an index between 0 and max, we use a cheap |
362 |
jsr166 |
1.39 |
* approximation to a mod operation, that also corrects for bias |
363 |
jsr166 |
1.43 |
* due to non-power-of-2 remaindering (see {@link |
364 |
|
|
* java.util.Random#nextInt}). Bits of the hashcode are masked |
365 |
|
|
* with "nbits", the ceiling power of two of table size (looked up |
366 |
|
|
* in a table packed into three ints). If too large, this is |
367 |
|
|
* retried after rotating the hash by nbits bits, while forcing new |
368 |
|
|
* top bit to 0, which guarantees eventual termination (although |
369 |
|
|
* with a non-random-bias). This requires an average of less than |
370 |
|
|
* 2 tries for all table sizes, and has a maximum 2% difference |
371 |
|
|
* from perfectly uniform slot probabilities when applied to all |
372 |
|
|
* possible hash codes for sizes less than 32. |
373 |
dl |
1.37 |
* |
374 |
|
|
* @return a per-thread-random index, 0 <= index < max |
375 |
|
|
*/ |
376 |
|
|
private final int hashIndex() { |
377 |
|
|
long id = Thread.currentThread().getId(); |
378 |
|
|
int hash = (((int)(id ^ (id >>> 32))) ^ 0x811c9dc5) * 0x01000193; |
379 |
|
|
|
380 |
|
|
int m = max.get(); |
381 |
|
|
int nbits = (((0xfffffc00 >> m) & 4) | // Compute ceil(log2(m+1)) |
382 |
|
|
((0x000001f8 >>> m) & 2) | // The constants hold |
383 |
|
|
((0xffff00f2 >>> m) & 1)); // a lookup table |
384 |
|
|
int index; |
385 |
|
|
while ((index = hash & ((1 << nbits) - 1)) > m) // May retry on |
386 |
|
|
hash = (hash >>> nbits) | (hash << (33 - nbits)); // non-power-2 m |
387 |
|
|
return index; |
388 |
dl |
1.2 |
} |
389 |
tim |
1.1 |
|
390 |
|
|
/** |
391 |
jsr166 |
1.39 |
* Creates a new slot at given index. Called only when the slot |
392 |
dl |
1.41 |
* appears to be null. Relies on double-check using builtin |
393 |
jsr166 |
1.43 |
* locks, since they rarely contend. This in turn relies on the |
394 |
dl |
1.41 |
* arena array being declared volatile. |
395 |
dl |
1.37 |
* |
396 |
|
|
* @param index the index to add slot at |
397 |
|
|
*/ |
398 |
|
|
private void createSlot(int index) { |
399 |
|
|
// Create slot outside of lock to narrow sync region |
400 |
|
|
Slot newSlot = new Slot(); |
401 |
|
|
Slot[] a = arena; |
402 |
jsr166 |
1.40 |
synchronized (a) { |
403 |
dl |
1.37 |
if (a[index] == null) |
404 |
|
|
a[index] = newSlot; |
405 |
|
|
} |
406 |
dl |
1.16 |
} |
407 |
|
|
|
408 |
|
|
/** |
409 |
dl |
1.42 |
* Tries to cancel a wait for the given node waiting in the given |
410 |
dl |
1.37 |
* slot, if so, helping clear the node from its slot to avoid |
411 |
|
|
* garbage retention. |
412 |
|
|
* |
413 |
|
|
* @param node the waiting node |
414 |
|
|
* @param the slot it is waiting in |
415 |
|
|
* @return true if successfully cancelled |
416 |
|
|
*/ |
417 |
|
|
private static boolean tryCancel(Node node, Slot slot) { |
418 |
|
|
if (!node.compareAndSet(null, CANCEL)) |
419 |
|
|
return false; |
420 |
dl |
1.41 |
if (slot.get() == node) // pre-check to minimize contention |
421 |
dl |
1.37 |
slot.compareAndSet(node, null); |
422 |
|
|
return true; |
423 |
|
|
} |
424 |
jsr166 |
1.21 |
|
425 |
dl |
1.37 |
// Three forms of waiting. Each just different enough not to merge |
426 |
|
|
// code with others. |
427 |
jsr166 |
1.31 |
|
428 |
dl |
1.37 |
/** |
429 |
|
|
* Spin-waits for hole for a non-0 slot. Fails if spin elapses |
430 |
jsr166 |
1.39 |
* before hole filled. Does not check interrupt, relying on check |
431 |
dl |
1.37 |
* in public exchange method to abort if interrupted on entry. |
432 |
|
|
* |
433 |
|
|
* @param node the waiting node |
434 |
|
|
* @return on success, the hole; on failure, CANCEL |
435 |
|
|
*/ |
436 |
|
|
private static Object spinWait(Node node, Slot slot) { |
437 |
|
|
int spins = SPINS; |
438 |
|
|
for (;;) { |
439 |
|
|
Object v = node.get(); |
440 |
|
|
if (v != null) |
441 |
|
|
return v; |
442 |
|
|
else if (spins > 0) |
443 |
|
|
--spins; |
444 |
|
|
else |
445 |
|
|
tryCancel(node, slot); |
446 |
|
|
} |
447 |
|
|
} |
448 |
dl |
1.16 |
|
449 |
dl |
1.37 |
/** |
450 |
|
|
* Waits for (by spinning and/or blocking) and gets the hole |
451 |
jsr166 |
1.39 |
* filled in by another thread. Fails if interrupted before |
452 |
dl |
1.37 |
* hole filled. |
453 |
|
|
* |
454 |
|
|
* When a node/thread is about to block, it sets its waiter field |
455 |
|
|
* and then rechecks state at least one more time before actually |
456 |
|
|
* parking, thus covering race vs fulfiller noticing that waiter |
457 |
|
|
* is non-null so should be woken. |
458 |
|
|
* |
459 |
|
|
* Thread interruption status is checked only surrounding calls to |
460 |
jsr166 |
1.39 |
* park. The caller is assumed to have checked interrupt status |
461 |
dl |
1.37 |
* on entry. |
462 |
|
|
* |
463 |
|
|
* @param node the waiting node |
464 |
|
|
* @return on success, the hole; on failure, CANCEL |
465 |
|
|
*/ |
466 |
|
|
private static Object await(Node node, Slot slot) { |
467 |
|
|
Thread w = Thread.currentThread(); |
468 |
|
|
int spins = SPINS; |
469 |
|
|
for (;;) { |
470 |
|
|
Object v = node.get(); |
471 |
|
|
if (v != null) |
472 |
|
|
return v; |
473 |
|
|
else if (spins > 0) // Spin-wait phase |
474 |
|
|
--spins; |
475 |
|
|
else if (node.waiter == null) // Set up to block next |
476 |
|
|
node.waiter = w; |
477 |
|
|
else if (w.isInterrupted()) // Abort on interrupt |
478 |
|
|
tryCancel(node, slot); |
479 |
|
|
else // Block |
480 |
|
|
LockSupport.park(node); |
481 |
dl |
1.16 |
} |
482 |
dl |
1.37 |
} |
483 |
dl |
1.16 |
|
484 |
dl |
1.37 |
/** |
485 |
|
|
* Waits for (at index 0) and gets the hole filled in by another |
486 |
|
|
* thread. Fails if timed out or interrupted before hole filled. |
487 |
|
|
* Same basic logic as untimed version, but a bit messier. |
488 |
|
|
* |
489 |
|
|
* @param node the waiting node |
490 |
|
|
* @param nanos the wait time |
491 |
|
|
* @return on success, the hole; on failure, CANCEL |
492 |
|
|
*/ |
493 |
|
|
private Object awaitNanos(Node node, Slot slot, long nanos) { |
494 |
|
|
int spins = TIMED_SPINS; |
495 |
jsr166 |
1.53 |
long deadline = 0L; |
496 |
dl |
1.37 |
Thread w = null; |
497 |
|
|
for (;;) { |
498 |
|
|
Object v = node.get(); |
499 |
|
|
if (v != null) |
500 |
|
|
return v; |
501 |
|
|
long now = System.nanoTime(); |
502 |
jsr166 |
1.53 |
if (w == null) { |
503 |
|
|
deadline = now + nanos; |
504 |
dl |
1.37 |
w = Thread.currentThread(); |
505 |
jsr166 |
1.53 |
} |
506 |
dl |
1.37 |
else |
507 |
jsr166 |
1.53 |
nanos = deadline - now; |
508 |
|
|
if (nanos > 0L) { |
509 |
dl |
1.37 |
if (spins > 0) |
510 |
|
|
--spins; |
511 |
|
|
else if (node.waiter == null) |
512 |
|
|
node.waiter = w; |
513 |
|
|
else if (w.isInterrupted()) |
514 |
|
|
tryCancel(node, slot); |
515 |
|
|
else |
516 |
|
|
LockSupport.parkNanos(node, nanos); |
517 |
|
|
} |
518 |
|
|
else if (tryCancel(node, slot) && !w.isInterrupted()) |
519 |
|
|
return scanOnTimeout(node); |
520 |
dl |
1.34 |
} |
521 |
dl |
1.37 |
} |
522 |
dl |
1.16 |
|
523 |
dl |
1.37 |
/** |
524 |
|
|
* Sweeps through arena checking for any waiting threads. Called |
525 |
|
|
* only upon return from timeout while waiting in slot 0. When a |
526 |
|
|
* thread gives up on a timed wait, it is possible that a |
527 |
|
|
* previously-entered thread is still waiting in some other |
528 |
jsr166 |
1.39 |
* slot. So we scan to check for any. This is almost always |
529 |
dl |
1.37 |
* overkill, but decreases the likelihood of timeouts when there |
530 |
|
|
* are other threads present to far less than that in lock-based |
531 |
|
|
* exchangers in which earlier-arriving threads may still be |
532 |
|
|
* waiting on entry locks. |
533 |
|
|
* |
534 |
|
|
* @param node the waiting node |
535 |
|
|
* @return another thread's item, or CANCEL |
536 |
|
|
*/ |
537 |
|
|
private Object scanOnTimeout(Node node) { |
538 |
|
|
Object y; |
539 |
|
|
for (int j = arena.length - 1; j >= 0; --j) { |
540 |
|
|
Slot slot = arena[j]; |
541 |
|
|
if (slot != null) { |
542 |
|
|
while ((y = slot.get()) != null) { |
543 |
|
|
if (slot.compareAndSet(y, null)) { |
544 |
|
|
Node you = (Node)y; |
545 |
|
|
if (you.compareAndSet(null, node.item)) { |
546 |
|
|
LockSupport.unpark(you.waiter); |
547 |
|
|
return you.item; |
548 |
|
|
} |
549 |
dl |
1.32 |
} |
550 |
dl |
1.16 |
} |
551 |
|
|
} |
552 |
|
|
} |
553 |
dl |
1.37 |
return CANCEL; |
554 |
|
|
} |
555 |
|
|
|
556 |
|
|
/** |
557 |
|
|
* Creates a new Exchanger. |
558 |
|
|
*/ |
559 |
|
|
public Exchanger() { |
560 |
tim |
1.1 |
} |
561 |
|
|
|
562 |
|
|
/** |
563 |
|
|
* Waits for another thread to arrive at this exchange point (unless |
564 |
jsr166 |
1.44 |
* the current thread is {@linkplain Thread#interrupt interrupted}), |
565 |
tim |
1.1 |
* and then transfers the given object to it, receiving its object |
566 |
|
|
* in return. |
567 |
jsr166 |
1.17 |
* |
568 |
tim |
1.1 |
* <p>If another thread is already waiting at the exchange point then |
569 |
|
|
* it is resumed for thread scheduling purposes and receives the object |
570 |
jsr166 |
1.39 |
* passed in by the current thread. The current thread returns immediately, |
571 |
tim |
1.1 |
* receiving the object passed to the exchange by that other thread. |
572 |
jsr166 |
1.17 |
* |
573 |
jsr166 |
1.15 |
* <p>If no other thread is already waiting at the exchange then the |
574 |
tim |
1.1 |
* current thread is disabled for thread scheduling purposes and lies |
575 |
|
|
* dormant until one of two things happens: |
576 |
|
|
* <ul> |
577 |
|
|
* <li>Some other thread enters the exchange; or |
578 |
jsr166 |
1.45 |
* <li>Some other thread {@linkplain Thread#interrupt interrupts} |
579 |
|
|
* the current thread. |
580 |
tim |
1.1 |
* </ul> |
581 |
|
|
* <p>If the current thread: |
582 |
|
|
* <ul> |
583 |
jsr166 |
1.15 |
* <li>has its interrupted status set on entry to this method; or |
584 |
jsr166 |
1.44 |
* <li>is {@linkplain Thread#interrupt interrupted} while waiting |
585 |
jsr166 |
1.15 |
* for the exchange, |
586 |
tim |
1.1 |
* </ul> |
587 |
jsr166 |
1.15 |
* then {@link InterruptedException} is thrown and the current thread's |
588 |
|
|
* interrupted status is cleared. |
589 |
tim |
1.1 |
* |
590 |
|
|
* @param x the object to exchange |
591 |
dl |
1.30 |
* @return the object provided by the other thread |
592 |
|
|
* @throws InterruptedException if the current thread was |
593 |
|
|
* interrupted while waiting |
594 |
jsr166 |
1.15 |
*/ |
595 |
jsr166 |
1.52 |
@SuppressWarnings("unchecked") |
596 |
tim |
1.1 |
public V exchange(V x) throws InterruptedException { |
597 |
dl |
1.37 |
if (!Thread.interrupted()) { |
598 |
jsr166 |
1.51 |
Object o = doExchange((x == null) ? NULL_ITEM : x, false, 0); |
599 |
|
|
if (o == NULL_ITEM) |
600 |
dl |
1.37 |
return null; |
601 |
jsr166 |
1.52 |
if (o != CANCEL) |
602 |
|
|
return (V)o; |
603 |
dl |
1.37 |
Thread.interrupted(); // Clear interrupt status on IE throw |
604 |
dl |
1.2 |
} |
605 |
dl |
1.37 |
throw new InterruptedException(); |
606 |
tim |
1.1 |
} |
607 |
|
|
|
608 |
|
|
/** |
609 |
|
|
* Waits for another thread to arrive at this exchange point (unless |
610 |
jsr166 |
1.44 |
* the current thread is {@linkplain Thread#interrupt interrupted} or |
611 |
jsr166 |
1.31 |
* the specified waiting time elapses), and then transfers the given |
612 |
|
|
* object to it, receiving its object in return. |
613 |
tim |
1.1 |
* |
614 |
|
|
* <p>If another thread is already waiting at the exchange point then |
615 |
|
|
* it is resumed for thread scheduling purposes and receives the object |
616 |
jsr166 |
1.39 |
* passed in by the current thread. The current thread returns immediately, |
617 |
tim |
1.1 |
* receiving the object passed to the exchange by that other thread. |
618 |
|
|
* |
619 |
jsr166 |
1.15 |
* <p>If no other thread is already waiting at the exchange then the |
620 |
tim |
1.1 |
* current thread is disabled for thread scheduling purposes and lies |
621 |
|
|
* dormant until one of three things happens: |
622 |
|
|
* <ul> |
623 |
|
|
* <li>Some other thread enters the exchange; or |
624 |
jsr166 |
1.44 |
* <li>Some other thread {@linkplain Thread#interrupt interrupts} |
625 |
|
|
* the current thread; or |
626 |
tim |
1.1 |
* <li>The specified waiting time elapses. |
627 |
|
|
* </ul> |
628 |
|
|
* <p>If the current thread: |
629 |
|
|
* <ul> |
630 |
jsr166 |
1.15 |
* <li>has its interrupted status set on entry to this method; or |
631 |
jsr166 |
1.44 |
* <li>is {@linkplain Thread#interrupt interrupted} while waiting |
632 |
jsr166 |
1.15 |
* for the exchange, |
633 |
tim |
1.1 |
* </ul> |
634 |
jsr166 |
1.15 |
* then {@link InterruptedException} is thrown and the current thread's |
635 |
|
|
* interrupted status is cleared. |
636 |
tim |
1.1 |
* |
637 |
dl |
1.37 |
* <p>If the specified waiting time elapses then {@link |
638 |
|
|
* TimeoutException} is thrown. If the time is less than or equal |
639 |
|
|
* to zero, the method will not wait at all. |
640 |
tim |
1.1 |
* |
641 |
|
|
* @param x the object to exchange |
642 |
|
|
* @param timeout the maximum time to wait |
643 |
dl |
1.30 |
* @param unit the time unit of the <tt>timeout</tt> argument |
644 |
|
|
* @return the object provided by the other thread |
645 |
|
|
* @throws InterruptedException if the current thread was |
646 |
|
|
* interrupted while waiting |
647 |
|
|
* @throws TimeoutException if the specified waiting time elapses |
648 |
|
|
* before another thread enters the exchange |
649 |
jsr166 |
1.15 |
*/ |
650 |
jsr166 |
1.52 |
@SuppressWarnings("unchecked") |
651 |
jsr166 |
1.15 |
public V exchange(V x, long timeout, TimeUnit unit) |
652 |
tim |
1.1 |
throws InterruptedException, TimeoutException { |
653 |
dl |
1.37 |
if (!Thread.interrupted()) { |
654 |
jsr166 |
1.51 |
Object o = doExchange((x == null) ? NULL_ITEM : x, |
655 |
dl |
1.37 |
true, unit.toNanos(timeout)); |
656 |
jsr166 |
1.51 |
if (o == NULL_ITEM) |
657 |
dl |
1.37 |
return null; |
658 |
jsr166 |
1.52 |
if (o != CANCEL) |
659 |
|
|
return (V)o; |
660 |
dl |
1.37 |
if (!Thread.interrupted()) |
661 |
|
|
throw new TimeoutException(); |
662 |
dl |
1.34 |
} |
663 |
dl |
1.37 |
throw new InterruptedException(); |
664 |
dl |
1.34 |
} |
665 |
tim |
1.1 |
} |