1 |
jsr166 |
1.1 |
/* |
2 |
|
|
* Written by Doug Lea with assistance from members of JCP JSR-166 |
3 |
|
|
* Expert Group and released to the public domain, as explained at |
4 |
|
|
* http://creativecommons.org/licenses/publicdomain |
5 |
|
|
*/ |
6 |
|
|
|
7 |
|
|
package java.util.concurrent; |
8 |
|
|
|
9 |
|
|
import java.util.Collection; |
10 |
|
|
|
11 |
|
|
/** |
12 |
|
|
* A thread managed by a {@link ForkJoinPool}. This class is |
13 |
|
|
* subclassable solely for the sake of adding functionality -- there |
14 |
jsr166 |
1.7 |
* are no overridable methods dealing with scheduling or execution. |
15 |
|
|
* However, you can override initialization and termination methods |
16 |
|
|
* surrounding the main task processing loop. If you do create such a |
17 |
|
|
* subclass, you will also need to supply a custom {@link |
18 |
|
|
* ForkJoinPool.ForkJoinWorkerThreadFactory} to use it in a {@code |
19 |
|
|
* ForkJoinPool}. |
20 |
jsr166 |
1.1 |
* |
21 |
|
|
* @since 1.7 |
22 |
|
|
* @author Doug Lea |
23 |
|
|
*/ |
24 |
|
|
public class ForkJoinWorkerThread extends Thread { |
25 |
|
|
/* |
26 |
|
|
* Algorithm overview: |
27 |
|
|
* |
28 |
|
|
* 1. Work-Stealing: Work-stealing queues are special forms of |
29 |
|
|
* Deques that support only three of the four possible |
30 |
|
|
* end-operations -- push, pop, and deq (aka steal), and only do |
31 |
|
|
* so under the constraints that push and pop are called only from |
32 |
|
|
* the owning thread, while deq may be called from other threads. |
33 |
|
|
* (If you are unfamiliar with them, you probably want to read |
34 |
|
|
* Herlihy and Shavit's book "The Art of Multiprocessor |
35 |
|
|
* programming", chapter 16 describing these in more detail before |
36 |
|
|
* proceeding.) The main work-stealing queue design is roughly |
37 |
|
|
* similar to "Dynamic Circular Work-Stealing Deque" by David |
38 |
|
|
* Chase and Yossi Lev, SPAA 2005 |
39 |
|
|
* (http://research.sun.com/scalable/pubs/index.html). The main |
40 |
|
|
* difference ultimately stems from gc requirements that we null |
41 |
|
|
* out taken slots as soon as we can, to maintain as small a |
42 |
|
|
* footprint as possible even in programs generating huge numbers |
43 |
|
|
* of tasks. To accomplish this, we shift the CAS arbitrating pop |
44 |
|
|
* vs deq (steal) from being on the indices ("base" and "sp") to |
45 |
|
|
* the slots themselves (mainly via method "casSlotNull()"). So, |
46 |
|
|
* both a successful pop and deq mainly entail CAS'ing a non-null |
47 |
|
|
* slot to null. Because we rely on CASes of references, we do |
48 |
|
|
* not need tag bits on base or sp. They are simple ints as used |
49 |
|
|
* in any circular array-based queue (see for example ArrayDeque). |
50 |
|
|
* Updates to the indices must still be ordered in a way that |
51 |
|
|
* guarantees that (sp - base) > 0 means the queue is empty, but |
52 |
|
|
* otherwise may err on the side of possibly making the queue |
53 |
|
|
* appear nonempty when a push, pop, or deq have not fully |
54 |
|
|
* committed. Note that this means that the deq operation, |
55 |
|
|
* considered individually, is not wait-free. One thief cannot |
56 |
|
|
* successfully continue until another in-progress one (or, if |
57 |
|
|
* previously empty, a push) completes. However, in the |
58 |
jsr166 |
1.8 |
* aggregate, we ensure at least probabilistic |
59 |
|
|
* non-blockingness. If an attempted steal fails, a thief always |
60 |
|
|
* chooses a different random victim target to try next. So, in |
61 |
|
|
* order for one thief to progress, it suffices for any |
62 |
|
|
* in-progress deq or new push on any empty queue to complete. One |
63 |
|
|
* reason this works well here is that apparently-nonempty often |
64 |
|
|
* means soon-to-be-stealable, which gives threads a chance to |
65 |
|
|
* activate if necessary before stealing (see below). |
66 |
jsr166 |
1.1 |
* |
67 |
jsr166 |
1.6 |
* This approach also enables support for "async mode" where local |
68 |
|
|
* task processing is in FIFO, not LIFO order; simply by using a |
69 |
|
|
* version of deq rather than pop when locallyFifo is true (as set |
70 |
|
|
* by the ForkJoinPool). This allows use in message-passing |
71 |
|
|
* frameworks in which tasks are never joined. |
72 |
|
|
* |
73 |
jsr166 |
1.1 |
* Efficient implementation of this approach currently relies on |
74 |
|
|
* an uncomfortable amount of "Unsafe" mechanics. To maintain |
75 |
|
|
* correct orderings, reads and writes of variable base require |
76 |
|
|
* volatile ordering. Variable sp does not require volatile write |
77 |
|
|
* but needs cheaper store-ordering on writes. Because they are |
78 |
|
|
* protected by volatile base reads, reads of the queue array and |
79 |
|
|
* its slots do not need volatile load semantics, but writes (in |
80 |
|
|
* push) require store order and CASes (in pop and deq) require |
81 |
jsr166 |
1.8 |
* (volatile) CAS semantics. (See "Idempotent work stealing" by |
82 |
|
|
* Michael, Saraswat, and Vechev, PPoPP 2009 |
83 |
|
|
* http://portal.acm.org/citation.cfm?id=1504186 for an algorithm |
84 |
|
|
* with similar properties, but without support for nulling |
85 |
|
|
* slots.) Since these combinations aren't supported using |
86 |
|
|
* ordinary volatiles, the only way to accomplish these |
87 |
|
|
* efficiently is to use direct Unsafe calls. (Using external |
88 |
jsr166 |
1.1 |
* AtomicIntegers and AtomicReferenceArrays for the indices and |
89 |
|
|
* array is significantly slower because of memory locality and |
90 |
jsr166 |
1.8 |
* indirection effects.) |
91 |
jsr166 |
1.9 |
* |
92 |
jsr166 |
1.8 |
* Further, performance on most platforms is very sensitive to |
93 |
|
|
* placement and sizing of the (resizable) queue array. Even |
94 |
|
|
* though these queues don't usually become all that big, the |
95 |
|
|
* initial size must be large enough to counteract cache |
96 |
jsr166 |
1.1 |
* contention effects across multiple queues (especially in the |
97 |
|
|
* presence of GC cardmarking). Also, to improve thread-locality, |
98 |
|
|
* queues are currently initialized immediately after the thread |
99 |
|
|
* gets the initial signal to start processing tasks. However, |
100 |
|
|
* all queue-related methods except pushTask are written in a way |
101 |
|
|
* that allows them to instead be lazily allocated and/or disposed |
102 |
|
|
* of when empty. All together, these low-level implementation |
103 |
|
|
* choices produce as much as a factor of 4 performance |
104 |
|
|
* improvement compared to naive implementations, and enable the |
105 |
|
|
* processing of billions of tasks per second, sometimes at the |
106 |
|
|
* expense of ugliness. |
107 |
|
|
* |
108 |
|
|
* 2. Run control: The primary run control is based on a global |
109 |
|
|
* counter (activeCount) held by the pool. It uses an algorithm |
110 |
|
|
* similar to that in Herlihy and Shavit section 17.6 to cause |
111 |
|
|
* threads to eventually block when all threads declare they are |
112 |
jsr166 |
1.8 |
* inactive. For this to work, threads must be declared active |
113 |
|
|
* when executing tasks, and before stealing a task. They must be |
114 |
|
|
* inactive before blocking on the Pool Barrier (awaiting a new |
115 |
|
|
* submission or other Pool event). In between, there is some free |
116 |
|
|
* play which we take advantage of to avoid contention and rapid |
117 |
|
|
* flickering of the global activeCount: If inactive, we activate |
118 |
|
|
* only if a victim queue appears to be nonempty (see above). |
119 |
|
|
* Similarly, a thread tries to inactivate only after a full scan |
120 |
|
|
* of other threads. The net effect is that contention on |
121 |
|
|
* activeCount is rarely a measurable performance issue. (There |
122 |
|
|
* are also a few other cases where we scan for work rather than |
123 |
|
|
* retry/block upon contention.) |
124 |
jsr166 |
1.1 |
* |
125 |
|
|
* 3. Selection control. We maintain policy of always choosing to |
126 |
|
|
* run local tasks rather than stealing, and always trying to |
127 |
|
|
* steal tasks before trying to run a new submission. All steals |
128 |
|
|
* are currently performed in randomly-chosen deq-order. It may be |
129 |
|
|
* worthwhile to bias these with locality / anti-locality |
130 |
|
|
* information, but doing this well probably requires more |
131 |
|
|
* lower-level information from JVMs than currently provided. |
132 |
|
|
*/ |
133 |
|
|
|
134 |
|
|
/** |
135 |
|
|
* Capacity of work-stealing queue array upon initialization. |
136 |
|
|
* Must be a power of two. Initial size must be at least 2, but is |
137 |
|
|
* padded to minimize cache effects. |
138 |
|
|
*/ |
139 |
|
|
private static final int INITIAL_QUEUE_CAPACITY = 1 << 13; |
140 |
|
|
|
141 |
|
|
/** |
142 |
|
|
* Maximum work-stealing queue array size. Must be less than or |
143 |
|
|
* equal to 1 << 28 to ensure lack of index wraparound. (This |
144 |
|
|
* is less than usual bounds, because we need leftshift by 3 |
145 |
|
|
* to be in int range). |
146 |
|
|
*/ |
147 |
|
|
private static final int MAXIMUM_QUEUE_CAPACITY = 1 << 28; |
148 |
|
|
|
149 |
|
|
/** |
150 |
|
|
* The pool this thread works in. Accessed directly by ForkJoinTask. |
151 |
|
|
*/ |
152 |
|
|
final ForkJoinPool pool; |
153 |
|
|
|
154 |
|
|
/** |
155 |
|
|
* The work-stealing queue array. Size must be a power of two. |
156 |
|
|
* Initialized when thread starts, to improve memory locality. |
157 |
|
|
*/ |
158 |
|
|
private ForkJoinTask<?>[] queue; |
159 |
|
|
|
160 |
|
|
/** |
161 |
|
|
* Index (mod queue.length) of next queue slot to push to or pop |
162 |
|
|
* from. It is written only by owner thread, via ordered store. |
163 |
|
|
* Both sp and base are allowed to wrap around on overflow, but |
164 |
|
|
* (sp - base) still estimates size. |
165 |
|
|
*/ |
166 |
|
|
private volatile int sp; |
167 |
|
|
|
168 |
|
|
/** |
169 |
|
|
* Index (mod queue.length) of least valid queue slot, which is |
170 |
|
|
* always the next position to steal from if nonempty. |
171 |
|
|
*/ |
172 |
|
|
private volatile int base; |
173 |
|
|
|
174 |
|
|
/** |
175 |
|
|
* Activity status. When true, this worker is considered active. |
176 |
|
|
* Must be false upon construction. It must be true when executing |
177 |
|
|
* tasks, and BEFORE stealing a task. It must be false before |
178 |
|
|
* calling pool.sync. |
179 |
|
|
*/ |
180 |
|
|
private boolean active; |
181 |
|
|
|
182 |
|
|
/** |
183 |
|
|
* Run state of this worker. Supports simple versions of the usual |
184 |
|
|
* shutdown/shutdownNow control. |
185 |
|
|
*/ |
186 |
|
|
private volatile int runState; |
187 |
|
|
|
188 |
|
|
/** |
189 |
|
|
* Seed for random number generator for choosing steal victims. |
190 |
|
|
* Uses Marsaglia xorshift. Must be nonzero upon initialization. |
191 |
|
|
*/ |
192 |
|
|
private int seed; |
193 |
|
|
|
194 |
|
|
/** |
195 |
|
|
* Number of steals, transferred to pool when idle |
196 |
|
|
*/ |
197 |
|
|
private int stealCount; |
198 |
|
|
|
199 |
|
|
/** |
200 |
|
|
* Index of this worker in pool array. Set once by pool before |
201 |
|
|
* running, and accessed directly by pool during cleanup etc. |
202 |
|
|
*/ |
203 |
|
|
int poolIndex; |
204 |
|
|
|
205 |
|
|
/** |
206 |
|
|
* The last barrier event waited for. Accessed in pool callback |
207 |
|
|
* methods, but only by current thread. |
208 |
|
|
*/ |
209 |
|
|
long lastEventCount; |
210 |
|
|
|
211 |
|
|
/** |
212 |
|
|
* True if use local fifo, not default lifo, for local polling |
213 |
|
|
*/ |
214 |
|
|
private boolean locallyFifo; |
215 |
|
|
|
216 |
|
|
/** |
217 |
|
|
* Creates a ForkJoinWorkerThread operating in the given pool. |
218 |
|
|
* |
219 |
|
|
* @param pool the pool this thread works in |
220 |
|
|
* @throws NullPointerException if pool is null |
221 |
|
|
*/ |
222 |
|
|
protected ForkJoinWorkerThread(ForkJoinPool pool) { |
223 |
|
|
if (pool == null) throw new NullPointerException(); |
224 |
|
|
this.pool = pool; |
225 |
|
|
// Note: poolIndex is set by pool during construction |
226 |
|
|
// Remaining initialization is deferred to onStart |
227 |
|
|
} |
228 |
|
|
|
229 |
|
|
// Public access methods |
230 |
|
|
|
231 |
|
|
/** |
232 |
|
|
* Returns the pool hosting this thread. |
233 |
|
|
* |
234 |
|
|
* @return the pool |
235 |
|
|
*/ |
236 |
|
|
public ForkJoinPool getPool() { |
237 |
|
|
return pool; |
238 |
|
|
} |
239 |
|
|
|
240 |
|
|
/** |
241 |
|
|
* Returns the index number of this thread in its pool. The |
242 |
|
|
* returned value ranges from zero to the maximum number of |
243 |
|
|
* threads (minus one) that have ever been created in the pool. |
244 |
|
|
* This method may be useful for applications that track status or |
245 |
|
|
* collect results per-worker rather than per-task. |
246 |
|
|
* |
247 |
|
|
* @return the index number |
248 |
|
|
*/ |
249 |
|
|
public int getPoolIndex() { |
250 |
|
|
return poolIndex; |
251 |
|
|
} |
252 |
|
|
|
253 |
|
|
/** |
254 |
|
|
* Establishes local first-in-first-out scheduling mode for forked |
255 |
|
|
* tasks that are never joined. |
256 |
|
|
* |
257 |
|
|
* @param async if true, use locally FIFO scheduling |
258 |
|
|
*/ |
259 |
|
|
void setAsyncMode(boolean async) { |
260 |
|
|
locallyFifo = async; |
261 |
|
|
} |
262 |
|
|
|
263 |
|
|
// Runstate management |
264 |
|
|
|
265 |
|
|
// Runstate values. Order matters |
266 |
|
|
private static final int RUNNING = 0; |
267 |
|
|
private static final int SHUTDOWN = 1; |
268 |
|
|
private static final int TERMINATING = 2; |
269 |
|
|
private static final int TERMINATED = 3; |
270 |
|
|
|
271 |
|
|
final boolean isShutdown() { return runState >= SHUTDOWN; } |
272 |
|
|
final boolean isTerminating() { return runState >= TERMINATING; } |
273 |
|
|
final boolean isTerminated() { return runState == TERMINATED; } |
274 |
|
|
final boolean shutdown() { return transitionRunStateTo(SHUTDOWN); } |
275 |
|
|
final boolean shutdownNow() { return transitionRunStateTo(TERMINATING); } |
276 |
|
|
|
277 |
|
|
/** |
278 |
jsr166 |
1.4 |
* Transitions to at least the given state. |
279 |
|
|
* |
280 |
|
|
* @return {@code true} if not already at least at given state |
281 |
jsr166 |
1.1 |
*/ |
282 |
|
|
private boolean transitionRunStateTo(int state) { |
283 |
|
|
for (;;) { |
284 |
|
|
int s = runState; |
285 |
|
|
if (s >= state) |
286 |
|
|
return false; |
287 |
|
|
if (UNSAFE.compareAndSwapInt(this, runStateOffset, s, state)) |
288 |
|
|
return true; |
289 |
|
|
} |
290 |
|
|
} |
291 |
|
|
|
292 |
|
|
/** |
293 |
|
|
* Tries to set status to active; fails on contention. |
294 |
|
|
*/ |
295 |
|
|
private boolean tryActivate() { |
296 |
|
|
if (!active) { |
297 |
|
|
if (!pool.tryIncrementActiveCount()) |
298 |
|
|
return false; |
299 |
|
|
active = true; |
300 |
|
|
} |
301 |
|
|
return true; |
302 |
|
|
} |
303 |
|
|
|
304 |
|
|
/** |
305 |
|
|
* Tries to set status to inactive; fails on contention. |
306 |
|
|
*/ |
307 |
|
|
private boolean tryInactivate() { |
308 |
|
|
if (active) { |
309 |
|
|
if (!pool.tryDecrementActiveCount()) |
310 |
|
|
return false; |
311 |
|
|
active = false; |
312 |
|
|
} |
313 |
|
|
return true; |
314 |
|
|
} |
315 |
|
|
|
316 |
|
|
/** |
317 |
|
|
* Computes next value for random victim probe. Scans don't |
318 |
|
|
* require a very high quality generator, but also not a crummy |
319 |
|
|
* one. Marsaglia xor-shift is cheap and works well. |
320 |
|
|
*/ |
321 |
|
|
private static int xorShift(int r) { |
322 |
jsr166 |
1.6 |
r ^= (r << 13); |
323 |
|
|
r ^= (r >>> 17); |
324 |
|
|
return r ^ (r << 5); |
325 |
jsr166 |
1.1 |
} |
326 |
|
|
|
327 |
|
|
// Lifecycle methods |
328 |
|
|
|
329 |
|
|
/** |
330 |
|
|
* This method is required to be public, but should never be |
331 |
|
|
* called explicitly. It performs the main run loop to execute |
332 |
|
|
* ForkJoinTasks. |
333 |
|
|
*/ |
334 |
|
|
public void run() { |
335 |
|
|
Throwable exception = null; |
336 |
|
|
try { |
337 |
|
|
onStart(); |
338 |
|
|
pool.sync(this); // await first pool event |
339 |
|
|
mainLoop(); |
340 |
|
|
} catch (Throwable ex) { |
341 |
|
|
exception = ex; |
342 |
|
|
} finally { |
343 |
|
|
onTermination(exception); |
344 |
|
|
} |
345 |
|
|
} |
346 |
|
|
|
347 |
|
|
/** |
348 |
|
|
* Executes tasks until shut down. |
349 |
|
|
*/ |
350 |
|
|
private void mainLoop() { |
351 |
|
|
while (!isShutdown()) { |
352 |
|
|
ForkJoinTask<?> t = pollTask(); |
353 |
|
|
if (t != null || (t = pollSubmission()) != null) |
354 |
|
|
t.quietlyExec(); |
355 |
|
|
else if (tryInactivate()) |
356 |
|
|
pool.sync(this); |
357 |
|
|
} |
358 |
|
|
} |
359 |
|
|
|
360 |
|
|
/** |
361 |
|
|
* Initializes internal state after construction but before |
362 |
|
|
* processing any tasks. If you override this method, you must |
363 |
|
|
* invoke super.onStart() at the beginning of the method. |
364 |
|
|
* Initialization requires care: Most fields must have legal |
365 |
|
|
* default values, to ensure that attempted accesses from other |
366 |
|
|
* threads work correctly even before this thread starts |
367 |
|
|
* processing tasks. |
368 |
|
|
*/ |
369 |
|
|
protected void onStart() { |
370 |
|
|
// Allocate while starting to improve chances of thread-local |
371 |
|
|
// isolation |
372 |
|
|
queue = new ForkJoinTask<?>[INITIAL_QUEUE_CAPACITY]; |
373 |
|
|
// Initial value of seed need not be especially random but |
374 |
|
|
// should differ across workers and must be nonzero |
375 |
|
|
int p = poolIndex + 1; |
376 |
|
|
seed = p + (p << 8) + (p << 16) + (p << 24); // spread bits |
377 |
|
|
} |
378 |
|
|
|
379 |
|
|
/** |
380 |
|
|
* Performs cleanup associated with termination of this worker |
381 |
|
|
* thread. If you override this method, you must invoke |
382 |
|
|
* {@code super.onTermination} at the end of the overridden method. |
383 |
|
|
* |
384 |
|
|
* @param exception the exception causing this thread to abort due |
385 |
jsr166 |
1.4 |
* to an unrecoverable error, or {@code null} if completed normally |
386 |
jsr166 |
1.1 |
*/ |
387 |
|
|
protected void onTermination(Throwable exception) { |
388 |
|
|
// Execute remaining local tasks unless aborting or terminating |
389 |
jsr166 |
1.8 |
while (exception == null && pool.isProcessingTasks() && base != sp) { |
390 |
jsr166 |
1.1 |
try { |
391 |
|
|
ForkJoinTask<?> t = popTask(); |
392 |
|
|
if (t != null) |
393 |
|
|
t.quietlyExec(); |
394 |
|
|
} catch (Throwable ex) { |
395 |
|
|
exception = ex; |
396 |
|
|
} |
397 |
|
|
} |
398 |
|
|
// Cancel other tasks, transition status, notify pool, and |
399 |
|
|
// propagate exception to uncaught exception handler |
400 |
|
|
try { |
401 |
|
|
do {} while (!tryInactivate()); // ensure inactive |
402 |
|
|
cancelTasks(); |
403 |
|
|
runState = TERMINATED; |
404 |
|
|
pool.workerTerminated(this); |
405 |
|
|
} catch (Throwable ex) { // Shouldn't ever happen |
406 |
|
|
if (exception == null) // but if so, at least rethrown |
407 |
|
|
exception = ex; |
408 |
|
|
} finally { |
409 |
|
|
if (exception != null) |
410 |
|
|
ForkJoinTask.rethrowException(exception); |
411 |
|
|
} |
412 |
|
|
} |
413 |
|
|
|
414 |
|
|
// Intrinsics-based support for queue operations. |
415 |
|
|
|
416 |
jsr166 |
1.10 |
private static long slotOffset(int i) { |
417 |
|
|
return ((long) i << qShift) + qBase; |
418 |
|
|
} |
419 |
|
|
|
420 |
jsr166 |
1.1 |
/** |
421 |
|
|
* Adds in store-order the given task at given slot of q to null. |
422 |
|
|
* Caller must ensure q is non-null and index is in range. |
423 |
|
|
*/ |
424 |
|
|
private static void setSlot(ForkJoinTask<?>[] q, int i, |
425 |
|
|
ForkJoinTask<?> t) { |
426 |
jsr166 |
1.10 |
UNSAFE.putOrderedObject(q, slotOffset(i), t); |
427 |
jsr166 |
1.1 |
} |
428 |
|
|
|
429 |
|
|
/** |
430 |
|
|
* CAS given slot of q to null. Caller must ensure q is non-null |
431 |
|
|
* and index is in range. |
432 |
|
|
*/ |
433 |
|
|
private static boolean casSlotNull(ForkJoinTask<?>[] q, int i, |
434 |
|
|
ForkJoinTask<?> t) { |
435 |
jsr166 |
1.10 |
return UNSAFE.compareAndSwapObject(q, slotOffset(i), t, null); |
436 |
jsr166 |
1.1 |
} |
437 |
|
|
|
438 |
|
|
/** |
439 |
|
|
* Sets sp in store-order. |
440 |
|
|
*/ |
441 |
|
|
private void storeSp(int s) { |
442 |
|
|
UNSAFE.putOrderedInt(this, spOffset, s); |
443 |
|
|
} |
444 |
|
|
|
445 |
|
|
// Main queue methods |
446 |
|
|
|
447 |
|
|
/** |
448 |
|
|
* Pushes a task. Called only by current thread. |
449 |
|
|
* |
450 |
|
|
* @param t the task. Caller must ensure non-null. |
451 |
|
|
*/ |
452 |
|
|
final void pushTask(ForkJoinTask<?> t) { |
453 |
|
|
ForkJoinTask<?>[] q = queue; |
454 |
|
|
int mask = q.length - 1; |
455 |
|
|
int s = sp; |
456 |
|
|
setSlot(q, s & mask, t); |
457 |
|
|
storeSp(++s); |
458 |
|
|
if ((s -= base) == 1) |
459 |
|
|
pool.signalWork(); |
460 |
|
|
else if (s >= mask) |
461 |
|
|
growQueue(); |
462 |
|
|
} |
463 |
|
|
|
464 |
|
|
/** |
465 |
|
|
* Tries to take a task from the base of the queue, failing if |
466 |
|
|
* either empty or contended. |
467 |
|
|
* |
468 |
|
|
* @return a task, or null if none or contended |
469 |
|
|
*/ |
470 |
|
|
final ForkJoinTask<?> deqTask() { |
471 |
|
|
ForkJoinTask<?> t; |
472 |
|
|
ForkJoinTask<?>[] q; |
473 |
|
|
int i; |
474 |
|
|
int b; |
475 |
|
|
if (sp != (b = base) && |
476 |
|
|
(q = queue) != null && // must read q after b |
477 |
|
|
(t = q[i = (q.length - 1) & b]) != null && |
478 |
|
|
casSlotNull(q, i, t)) { |
479 |
|
|
base = b + 1; |
480 |
|
|
return t; |
481 |
|
|
} |
482 |
|
|
return null; |
483 |
|
|
} |
484 |
|
|
|
485 |
|
|
/** |
486 |
jsr166 |
1.6 |
* Tries to take a task from the base of own queue, activating if |
487 |
|
|
* necessary, failing only if empty. Called only by current thread. |
488 |
|
|
* |
489 |
|
|
* @return a task, or null if none |
490 |
|
|
*/ |
491 |
|
|
final ForkJoinTask<?> locallyDeqTask() { |
492 |
|
|
int b; |
493 |
|
|
while (sp != (b = base)) { |
494 |
|
|
if (tryActivate()) { |
495 |
|
|
ForkJoinTask<?>[] q = queue; |
496 |
|
|
int i = (q.length - 1) & b; |
497 |
|
|
ForkJoinTask<?> t = q[i]; |
498 |
|
|
if (t != null && casSlotNull(q, i, t)) { |
499 |
|
|
base = b + 1; |
500 |
|
|
return t; |
501 |
|
|
} |
502 |
|
|
} |
503 |
|
|
} |
504 |
|
|
return null; |
505 |
|
|
} |
506 |
|
|
|
507 |
|
|
/** |
508 |
jsr166 |
1.1 |
* Returns a popped task, or null if empty. Ensures active status |
509 |
|
|
* if non-null. Called only by current thread. |
510 |
|
|
*/ |
511 |
|
|
final ForkJoinTask<?> popTask() { |
512 |
|
|
int s = sp; |
513 |
|
|
while (s != base) { |
514 |
|
|
if (tryActivate()) { |
515 |
|
|
ForkJoinTask<?>[] q = queue; |
516 |
|
|
int mask = q.length - 1; |
517 |
|
|
int i = (s - 1) & mask; |
518 |
|
|
ForkJoinTask<?> t = q[i]; |
519 |
|
|
if (t == null || !casSlotNull(q, i, t)) |
520 |
|
|
break; |
521 |
|
|
storeSp(s - 1); |
522 |
|
|
return t; |
523 |
|
|
} |
524 |
|
|
} |
525 |
|
|
return null; |
526 |
|
|
} |
527 |
|
|
|
528 |
|
|
/** |
529 |
|
|
* Specialized version of popTask to pop only if |
530 |
|
|
* topmost element is the given task. Called only |
531 |
|
|
* by current thread while active. |
532 |
|
|
* |
533 |
|
|
* @param t the task. Caller must ensure non-null. |
534 |
|
|
*/ |
535 |
|
|
final boolean unpushTask(ForkJoinTask<?> t) { |
536 |
|
|
ForkJoinTask<?>[] q = queue; |
537 |
|
|
int mask = q.length - 1; |
538 |
|
|
int s = sp - 1; |
539 |
|
|
if (casSlotNull(q, s & mask, t)) { |
540 |
|
|
storeSp(s); |
541 |
|
|
return true; |
542 |
|
|
} |
543 |
|
|
return false; |
544 |
|
|
} |
545 |
|
|
|
546 |
|
|
/** |
547 |
jsr166 |
1.6 |
* Returns next task or null if empty or contended |
548 |
jsr166 |
1.1 |
*/ |
549 |
|
|
final ForkJoinTask<?> peekTask() { |
550 |
|
|
ForkJoinTask<?>[] q = queue; |
551 |
|
|
if (q == null) |
552 |
|
|
return null; |
553 |
|
|
int mask = q.length - 1; |
554 |
|
|
int i = locallyFifo ? base : (sp - 1); |
555 |
|
|
return q[i & mask]; |
556 |
|
|
} |
557 |
|
|
|
558 |
|
|
/** |
559 |
|
|
* Doubles queue array size. Transfers elements by emulating |
560 |
|
|
* steals (deqs) from old array and placing, oldest first, into |
561 |
|
|
* new array. |
562 |
|
|
*/ |
563 |
|
|
private void growQueue() { |
564 |
|
|
ForkJoinTask<?>[] oldQ = queue; |
565 |
|
|
int oldSize = oldQ.length; |
566 |
|
|
int newSize = oldSize << 1; |
567 |
|
|
if (newSize > MAXIMUM_QUEUE_CAPACITY) |
568 |
|
|
throw new RejectedExecutionException("Queue capacity exceeded"); |
569 |
|
|
ForkJoinTask<?>[] newQ = queue = new ForkJoinTask<?>[newSize]; |
570 |
|
|
|
571 |
|
|
int b = base; |
572 |
|
|
int bf = b + oldSize; |
573 |
|
|
int oldMask = oldSize - 1; |
574 |
|
|
int newMask = newSize - 1; |
575 |
|
|
do { |
576 |
|
|
int oldIndex = b & oldMask; |
577 |
|
|
ForkJoinTask<?> t = oldQ[oldIndex]; |
578 |
|
|
if (t != null && !casSlotNull(oldQ, oldIndex, t)) |
579 |
|
|
t = null; |
580 |
|
|
setSlot(newQ, b & newMask, t); |
581 |
|
|
} while (++b != bf); |
582 |
|
|
pool.signalWork(); |
583 |
|
|
} |
584 |
|
|
|
585 |
|
|
/** |
586 |
|
|
* Tries to steal a task from another worker. Starts at a random |
587 |
|
|
* index of workers array, and probes workers until finding one |
588 |
|
|
* with non-empty queue or finding that all are empty. It |
589 |
|
|
* randomly selects the first n probes. If these are empty, it |
590 |
|
|
* resorts to a full circular traversal, which is necessary to |
591 |
|
|
* accurately set active status by caller. Also restarts if pool |
592 |
|
|
* events occurred since last scan, which forces refresh of |
593 |
|
|
* workers array, in case barrier was associated with resize. |
594 |
|
|
* |
595 |
|
|
* This method must be both fast and quiet -- usually avoiding |
596 |
|
|
* memory accesses that could disrupt cache sharing etc other than |
597 |
|
|
* those needed to check for and take tasks. This accounts for, |
598 |
|
|
* among other things, updating random seed in place without |
599 |
|
|
* storing it until exit. |
600 |
|
|
* |
601 |
|
|
* @return a task, or null if none found |
602 |
|
|
*/ |
603 |
|
|
private ForkJoinTask<?> scan() { |
604 |
|
|
ForkJoinTask<?> t = null; |
605 |
|
|
int r = seed; // extract once to keep scan quiet |
606 |
|
|
ForkJoinWorkerThread[] ws; // refreshed on outer loop |
607 |
|
|
int mask; // must be power 2 minus 1 and > 0 |
608 |
|
|
outer:do { |
609 |
|
|
if ((ws = pool.workers) != null && (mask = ws.length - 1) > 0) { |
610 |
|
|
int idx = r; |
611 |
|
|
int probes = ~mask; // use random index while negative |
612 |
|
|
for (;;) { |
613 |
|
|
r = xorShift(r); // update random seed |
614 |
|
|
ForkJoinWorkerThread v = ws[mask & idx]; |
615 |
|
|
if (v == null || v.sp == v.base) { |
616 |
|
|
if (probes <= mask) |
617 |
|
|
idx = (probes++ < 0) ? r : (idx + 1); |
618 |
|
|
else |
619 |
|
|
break; |
620 |
|
|
} |
621 |
|
|
else if (!tryActivate() || (t = v.deqTask()) == null) |
622 |
|
|
continue outer; // restart on contention |
623 |
|
|
else |
624 |
|
|
break outer; |
625 |
|
|
} |
626 |
|
|
} |
627 |
|
|
} while (pool.hasNewSyncEvent(this)); // retry on pool events |
628 |
|
|
seed = r; |
629 |
|
|
return t; |
630 |
|
|
} |
631 |
|
|
|
632 |
|
|
/** |
633 |
|
|
* Gets and removes a local or stolen task. |
634 |
|
|
* |
635 |
|
|
* @return a task, if available |
636 |
|
|
*/ |
637 |
|
|
final ForkJoinTask<?> pollTask() { |
638 |
jsr166 |
1.6 |
ForkJoinTask<?> t = locallyFifo ? locallyDeqTask() : popTask(); |
639 |
jsr166 |
1.1 |
if (t == null && (t = scan()) != null) |
640 |
|
|
++stealCount; |
641 |
|
|
return t; |
642 |
|
|
} |
643 |
|
|
|
644 |
|
|
/** |
645 |
|
|
* Gets a local task. |
646 |
|
|
* |
647 |
|
|
* @return a task, if available |
648 |
|
|
*/ |
649 |
|
|
final ForkJoinTask<?> pollLocalTask() { |
650 |
jsr166 |
1.6 |
return locallyFifo ? locallyDeqTask() : popTask(); |
651 |
jsr166 |
1.1 |
} |
652 |
|
|
|
653 |
|
|
/** |
654 |
|
|
* Returns a pool submission, if one exists, activating first. |
655 |
|
|
* |
656 |
|
|
* @return a submission, if available |
657 |
|
|
*/ |
658 |
|
|
private ForkJoinTask<?> pollSubmission() { |
659 |
|
|
ForkJoinPool p = pool; |
660 |
|
|
while (p.hasQueuedSubmissions()) { |
661 |
|
|
ForkJoinTask<?> t; |
662 |
|
|
if (tryActivate() && (t = p.pollSubmission()) != null) |
663 |
|
|
return t; |
664 |
|
|
} |
665 |
|
|
return null; |
666 |
|
|
} |
667 |
|
|
|
668 |
|
|
// Methods accessed only by Pool |
669 |
|
|
|
670 |
|
|
/** |
671 |
|
|
* Removes and cancels all tasks in queue. Can be called from any |
672 |
|
|
* thread. |
673 |
|
|
*/ |
674 |
|
|
final void cancelTasks() { |
675 |
|
|
ForkJoinTask<?> t; |
676 |
|
|
while (base != sp && (t = deqTask()) != null) |
677 |
|
|
t.cancelIgnoringExceptions(); |
678 |
|
|
} |
679 |
|
|
|
680 |
|
|
/** |
681 |
|
|
* Drains tasks to given collection c. |
682 |
|
|
* |
683 |
|
|
* @return the number of tasks drained |
684 |
|
|
*/ |
685 |
jsr166 |
1.5 |
final int drainTasksTo(Collection<? super ForkJoinTask<?>> c) { |
686 |
jsr166 |
1.1 |
int n = 0; |
687 |
|
|
ForkJoinTask<?> t; |
688 |
|
|
while (base != sp && (t = deqTask()) != null) { |
689 |
|
|
c.add(t); |
690 |
|
|
++n; |
691 |
|
|
} |
692 |
|
|
return n; |
693 |
|
|
} |
694 |
|
|
|
695 |
|
|
/** |
696 |
|
|
* Gets and clears steal count for accumulation by pool. Called |
697 |
|
|
* only when known to be idle (in pool.sync and termination). |
698 |
|
|
*/ |
699 |
|
|
final int getAndClearStealCount() { |
700 |
|
|
int sc = stealCount; |
701 |
|
|
stealCount = 0; |
702 |
|
|
return sc; |
703 |
|
|
} |
704 |
|
|
|
705 |
|
|
/** |
706 |
jsr166 |
1.4 |
* Returns {@code true} if at least one worker in the given array |
707 |
|
|
* appears to have at least one queued task. |
708 |
jsr166 |
1.1 |
* |
709 |
|
|
* @param ws array of workers |
710 |
|
|
*/ |
711 |
|
|
static boolean hasQueuedTasks(ForkJoinWorkerThread[] ws) { |
712 |
|
|
if (ws != null) { |
713 |
|
|
int len = ws.length; |
714 |
|
|
for (int j = 0; j < 2; ++j) { // need two passes for clean sweep |
715 |
|
|
for (int i = 0; i < len; ++i) { |
716 |
|
|
ForkJoinWorkerThread w = ws[i]; |
717 |
|
|
if (w != null && w.sp != w.base) |
718 |
|
|
return true; |
719 |
|
|
} |
720 |
|
|
} |
721 |
|
|
} |
722 |
|
|
return false; |
723 |
|
|
} |
724 |
|
|
|
725 |
|
|
// Support methods for ForkJoinTask |
726 |
|
|
|
727 |
|
|
/** |
728 |
|
|
* Returns an estimate of the number of tasks in the queue. |
729 |
|
|
*/ |
730 |
|
|
final int getQueueSize() { |
731 |
|
|
// suppress momentarily negative values |
732 |
|
|
return Math.max(0, sp - base); |
733 |
|
|
} |
734 |
|
|
|
735 |
|
|
/** |
736 |
|
|
* Returns an estimate of the number of tasks, offset by a |
737 |
|
|
* function of number of idle workers. |
738 |
|
|
*/ |
739 |
|
|
final int getEstimatedSurplusTaskCount() { |
740 |
dl |
1.11 |
/* |
741 |
|
|
* The goal here is to provide a very cheap heuristic guide |
742 |
|
|
* for task partitioning when programmers, frameworks, tools, |
743 |
|
|
* or languages have little or no idea about task granularity. |
744 |
|
|
* In essence by offering this method, we ask users only about |
745 |
|
|
* tradeoffs in overhead vs expected throughput and its |
746 |
|
|
* variance, rather than how finely to partition tasks. |
747 |
|
|
* |
748 |
|
|
* In a steady state strict (tree-structured) computation, |
749 |
|
|
* each thread makes available for stealing enough tasks for |
750 |
|
|
* other threads to remain active. Inductively, if all threads |
751 |
|
|
* play by the same rules, each thread should make available |
752 |
|
|
* only a constant number of tasks. |
753 |
|
|
* |
754 |
|
|
* The minimum useful constant is just 1. But using a value of |
755 |
|
|
* 1 would require immediate replenishment upon each steal to |
756 |
|
|
* maintain enough tasks, which is infeasible. Further, |
757 |
|
|
* partitionings/granularities of offered tasks should |
758 |
|
|
* minimize steal rates, which in general means that threads |
759 |
|
|
* nearer the top of computation tree should generate more |
760 |
|
|
* than those nearer the bottom. In perfect steady state, each |
761 |
|
|
* thread is at approximately the same level of computation |
762 |
|
|
* tree. However, producing extra tasks amortizes the |
763 |
|
|
* uncertainty of progress and diffusion assumptions. |
764 |
|
|
* |
765 |
|
|
* So, users will want to use values larger, but not much |
766 |
|
|
* larger than 1 to both smooth over transient shortages and |
767 |
|
|
* hedge against uneven progress; as traded off against the |
768 |
|
|
* cost of extra task overhead. We leave the user to pick a |
769 |
|
|
* threshold value to compare with the results of this call to |
770 |
|
|
* guide decisions, but recommend values such as 3. |
771 |
|
|
* |
772 |
|
|
* When all threads are active, it is on average OK to |
773 |
|
|
* estimate surplus strictly locally. In steady-state, if one |
774 |
|
|
* thread is maintaining say 2 surplus tasks, then so are |
775 |
|
|
* others. So we can just use estimated queue length (although |
776 |
|
|
* note that (sp - base) can be an overestimate because of |
777 |
|
|
* stealers lagging increments of base). |
778 |
|
|
* |
779 |
|
|
* However, this strategy alone leads to serious mis-estimates |
780 |
|
|
* in some non-steady-state conditions (ramp-up, ramp-down, |
781 |
|
|
* other stalls). We can detect many of these by further |
782 |
|
|
* considering the number of "idle" threads, that are known to |
783 |
|
|
* have zero queued tasks. A straight compensation would lead |
784 |
|
|
* to weighting of the queued task estimate by a function of |
785 |
|
|
* the proportion of idle threads. However, we don't want to |
786 |
|
|
* waste much calculation for the sake of weightings that only |
787 |
|
|
* apply transiently, so cheapen this by (a) not bothering to |
788 |
|
|
* weight at all unless there is more than one queued task (b) |
789 |
|
|
* rather than compensating by a factor of (#idle/#active) |
790 |
|
|
* threads, we just substract out a function of #idle that is |
791 |
|
|
* a good enough approximation for conditions near the |
792 |
|
|
* borderlines for threshold testing. This errs in the |
793 |
|
|
* direction of reporting more extreme lack of surplus (as in |
794 |
|
|
* returning negative values) in cases where users should |
795 |
|
|
* almost surely be generating tasks anyway. |
796 |
|
|
*/ |
797 |
|
|
int n = sp - base; |
798 |
|
|
return n > 1? n - (pool.getIdleThreadCount() >>> 2) : n; |
799 |
jsr166 |
1.1 |
} |
800 |
|
|
|
801 |
|
|
/** |
802 |
|
|
* Scans, returning early if joinMe done. |
803 |
|
|
*/ |
804 |
|
|
final ForkJoinTask<?> scanWhileJoining(ForkJoinTask<?> joinMe) { |
805 |
|
|
ForkJoinTask<?> t = pollTask(); |
806 |
|
|
if (t != null && joinMe.status < 0 && sp == base) { |
807 |
|
|
pushTask(t); // unsteal if done and this task would be stealable |
808 |
|
|
t = null; |
809 |
|
|
} |
810 |
|
|
return t; |
811 |
|
|
} |
812 |
|
|
|
813 |
|
|
/** |
814 |
|
|
* Runs tasks until {@code pool.isQuiescent()}. |
815 |
|
|
*/ |
816 |
|
|
final void helpQuiescePool() { |
817 |
|
|
for (;;) { |
818 |
|
|
ForkJoinTask<?> t = pollTask(); |
819 |
|
|
if (t != null) |
820 |
|
|
t.quietlyExec(); |
821 |
|
|
else if (tryInactivate() && pool.isQuiescent()) |
822 |
|
|
break; |
823 |
|
|
} |
824 |
|
|
do {} while (!tryActivate()); // re-activate on exit |
825 |
|
|
} |
826 |
|
|
|
827 |
|
|
// Unsafe mechanics |
828 |
|
|
|
829 |
|
|
private static final sun.misc.Unsafe UNSAFE = sun.misc.Unsafe.getUnsafe(); |
830 |
jsr166 |
1.2 |
private static final long spOffset = |
831 |
jsr166 |
1.3 |
objectFieldOffset("sp", ForkJoinWorkerThread.class); |
832 |
jsr166 |
1.2 |
private static final long runStateOffset = |
833 |
jsr166 |
1.3 |
objectFieldOffset("runState", ForkJoinWorkerThread.class); |
834 |
jsr166 |
1.2 |
private static final long qBase; |
835 |
|
|
private static final int qShift; |
836 |
jsr166 |
1.1 |
|
837 |
|
|
static { |
838 |
|
|
qBase = UNSAFE.arrayBaseOffset(ForkJoinTask[].class); |
839 |
|
|
int s = UNSAFE.arrayIndexScale(ForkJoinTask[].class); |
840 |
|
|
if ((s & (s-1)) != 0) |
841 |
|
|
throw new Error("data type scale not a power of two"); |
842 |
|
|
qShift = 31 - Integer.numberOfLeadingZeros(s); |
843 |
|
|
} |
844 |
jsr166 |
1.3 |
|
845 |
|
|
private static long objectFieldOffset(String field, Class<?> klazz) { |
846 |
|
|
try { |
847 |
|
|
return UNSAFE.objectFieldOffset(klazz.getDeclaredField(field)); |
848 |
|
|
} catch (NoSuchFieldException e) { |
849 |
|
|
// Convert Exception to corresponding Error |
850 |
|
|
NoSuchFieldError error = new NoSuchFieldError(field); |
851 |
|
|
error.initCause(e); |
852 |
|
|
throw error; |
853 |
|
|
} |
854 |
|
|
} |
855 |
jsr166 |
1.1 |
} |