1 |
jsr166 |
1.1 |
/* |
2 |
|
|
* Written by Doug Lea with assistance from members of JCP JSR-166 |
3 |
|
|
* Expert Group and released to the public domain, as explained at |
4 |
|
|
* http://creativecommons.org/licenses/publicdomain |
5 |
|
|
*/ |
6 |
|
|
|
7 |
|
|
package java.util.concurrent; |
8 |
|
|
|
9 |
dl |
1.14 |
import java.util.Random; |
10 |
jsr166 |
1.1 |
import java.util.Collection; |
11 |
dl |
1.14 |
import java.util.concurrent.locks.LockSupport; |
12 |
jsr166 |
1.1 |
|
13 |
|
|
/** |
14 |
|
|
* A thread managed by a {@link ForkJoinPool}. This class is |
15 |
|
|
* subclassable solely for the sake of adding functionality -- there |
16 |
jsr166 |
1.7 |
* are no overridable methods dealing with scheduling or execution. |
17 |
|
|
* However, you can override initialization and termination methods |
18 |
|
|
* surrounding the main task processing loop. If you do create such a |
19 |
|
|
* subclass, you will also need to supply a custom {@link |
20 |
|
|
* ForkJoinPool.ForkJoinWorkerThreadFactory} to use it in a {@code |
21 |
|
|
* ForkJoinPool}. |
22 |
jsr166 |
1.1 |
* |
23 |
|
|
* @since 1.7 |
24 |
|
|
* @author Doug Lea |
25 |
|
|
*/ |
26 |
|
|
public class ForkJoinWorkerThread extends Thread { |
27 |
|
|
/* |
28 |
dl |
1.14 |
* Overview: |
29 |
jsr166 |
1.1 |
* |
30 |
dl |
1.14 |
* ForkJoinWorkerThreads are managed by ForkJoinPools and perform |
31 |
|
|
* ForkJoinTasks. This class includes bookkeeping in support of |
32 |
|
|
* worker activation, suspension, and lifecycle control described |
33 |
|
|
* in more detail in the internal documentation of class |
34 |
|
|
* ForkJoinPool. And as described further below, this class also |
35 |
|
|
* includes special-cased support for some ForkJoinTask |
36 |
|
|
* methods. But the main mechanics involve work-stealing: |
37 |
|
|
* |
38 |
|
|
* Work-stealing queues are special forms of Deques that support |
39 |
|
|
* only three of the four possible end-operations -- push, pop, |
40 |
|
|
* and deq (aka steal), under the further constraints that push |
41 |
|
|
* and pop are called only from the owning thread, while deq may |
42 |
|
|
* be called from other threads. (If you are unfamiliar with |
43 |
|
|
* them, you probably want to read Herlihy and Shavit's book "The |
44 |
|
|
* Art of Multiprocessor programming", chapter 16 describing these |
45 |
|
|
* in more detail before proceeding.) The main work-stealing |
46 |
|
|
* queue design is roughly similar to those in the papers "Dynamic |
47 |
|
|
* Circular Work-Stealing Deque" by Chase and Lev, SPAA 2005 |
48 |
|
|
* (http://research.sun.com/scalable/pubs/index.html) and |
49 |
|
|
* "Idempotent work stealing" by Michael, Saraswat, and Vechev, |
50 |
|
|
* PPoPP 2009 (http://portal.acm.org/citation.cfm?id=1504186). |
51 |
|
|
* The main differences ultimately stem from gc requirements that |
52 |
|
|
* we null out taken slots as soon as we can, to maintain as small |
53 |
|
|
* a footprint as possible even in programs generating huge |
54 |
|
|
* numbers of tasks. To accomplish this, we shift the CAS |
55 |
|
|
* arbitrating pop vs deq (steal) from being on the indices |
56 |
|
|
* ("base" and "sp") to the slots themselves (mainly via method |
57 |
|
|
* "casSlotNull()"). So, both a successful pop and deq mainly |
58 |
|
|
* entail a CAS of a slot from non-null to null. Because we rely |
59 |
|
|
* on CASes of references, we do not need tag bits on base or sp. |
60 |
|
|
* They are simple ints as used in any circular array-based queue |
61 |
|
|
* (see for example ArrayDeque). Updates to the indices must |
62 |
|
|
* still be ordered in a way that guarantees that sp == base means |
63 |
|
|
* the queue is empty, but otherwise may err on the side of |
64 |
|
|
* possibly making the queue appear nonempty when a push, pop, or |
65 |
|
|
* deq have not fully committed. Note that this means that the deq |
66 |
|
|
* operation, considered individually, is not wait-free. One thief |
67 |
|
|
* cannot successfully continue until another in-progress one (or, |
68 |
|
|
* if previously empty, a push) completes. However, in the |
69 |
|
|
* aggregate, we ensure at least probabilistic non-blockingness. |
70 |
|
|
* If an attempted steal fails, a thief always chooses a different |
71 |
|
|
* random victim target to try next. So, in order for one thief to |
72 |
|
|
* progress, it suffices for any in-progress deq or new push on |
73 |
|
|
* any empty queue to complete. One reason this works well here is |
74 |
|
|
* that apparently-nonempty often means soon-to-be-stealable, |
75 |
|
|
* which gives threads a chance to set activation status if |
76 |
|
|
* necessary before stealing. |
77 |
jsr166 |
1.1 |
* |
78 |
jsr166 |
1.6 |
* This approach also enables support for "async mode" where local |
79 |
|
|
* task processing is in FIFO, not LIFO order; simply by using a |
80 |
|
|
* version of deq rather than pop when locallyFifo is true (as set |
81 |
|
|
* by the ForkJoinPool). This allows use in message-passing |
82 |
|
|
* frameworks in which tasks are never joined. |
83 |
|
|
* |
84 |
jsr166 |
1.1 |
* Efficient implementation of this approach currently relies on |
85 |
|
|
* an uncomfortable amount of "Unsafe" mechanics. To maintain |
86 |
|
|
* correct orderings, reads and writes of variable base require |
87 |
dl |
1.14 |
* volatile ordering. Variable sp does not require volatile |
88 |
|
|
* writes but still needs store-ordering, which we accomplish by |
89 |
|
|
* pre-incrementing sp before filling the slot with an ordered |
90 |
|
|
* store. (Pre-incrementing also enables backouts used in |
91 |
|
|
* scanWhileJoining.) Because they are protected by volatile base |
92 |
|
|
* reads, reads of the queue array and its slots by other threads |
93 |
|
|
* do not need volatile load semantics, but writes (in push) |
94 |
|
|
* require store order and CASes (in pop and deq) require |
95 |
|
|
* (volatile) CAS semantics. (Michael, Saraswat, and Vechev's |
96 |
|
|
* algorithm has similar properties, but without support for |
97 |
|
|
* nulling slots.) Since these combinations aren't supported |
98 |
|
|
* using ordinary volatiles, the only way to accomplish these |
99 |
jsr166 |
1.8 |
* efficiently is to use direct Unsafe calls. (Using external |
100 |
jsr166 |
1.1 |
* AtomicIntegers and AtomicReferenceArrays for the indices and |
101 |
|
|
* array is significantly slower because of memory locality and |
102 |
jsr166 |
1.8 |
* indirection effects.) |
103 |
jsr166 |
1.9 |
* |
104 |
jsr166 |
1.8 |
* Further, performance on most platforms is very sensitive to |
105 |
|
|
* placement and sizing of the (resizable) queue array. Even |
106 |
|
|
* though these queues don't usually become all that big, the |
107 |
|
|
* initial size must be large enough to counteract cache |
108 |
jsr166 |
1.1 |
* contention effects across multiple queues (especially in the |
109 |
|
|
* presence of GC cardmarking). Also, to improve thread-locality, |
110 |
dl |
1.14 |
* queues are initialized after starting. All together, these |
111 |
|
|
* low-level implementation choices produce as much as a factor of |
112 |
|
|
* 4 performance improvement compared to naive implementations, |
113 |
|
|
* and enable the processing of billions of tasks per second, |
114 |
|
|
* sometimes at the expense of ugliness. |
115 |
jsr166 |
1.1 |
*/ |
116 |
|
|
|
117 |
|
|
/** |
118 |
dl |
1.14 |
* Generator for initial random seeds for random victim |
119 |
|
|
* selection. This is used only to create initial seeds. Random |
120 |
|
|
* steals use a cheaper xorshift generator per steal attempt. We |
121 |
|
|
* expect only rare contention on seedGenerator, so just use a |
122 |
|
|
* plain Random. |
123 |
|
|
*/ |
124 |
|
|
private static final Random seedGenerator = new Random(); |
125 |
|
|
|
126 |
|
|
/** |
127 |
|
|
* The timeout value for suspending spares. Spare workers that |
128 |
|
|
* remain unsignalled for more than this time may be trimmed |
129 |
|
|
* (killed and removed from pool). Since our goal is to avoid |
130 |
|
|
* long-term thread buildup, the exact value of timeout does not |
131 |
|
|
* matter too much so long as it avoids most false-alarm timeouts |
132 |
|
|
* under GC stalls or momentarily high system load. |
133 |
|
|
*/ |
134 |
|
|
private static final long SPARE_KEEPALIVE_NANOS = |
135 |
|
|
5L * 1000L * 1000L * 1000L; // 5 secs |
136 |
|
|
|
137 |
|
|
/** |
138 |
jsr166 |
1.1 |
* Capacity of work-stealing queue array upon initialization. |
139 |
|
|
* Must be a power of two. Initial size must be at least 2, but is |
140 |
|
|
* padded to minimize cache effects. |
141 |
|
|
*/ |
142 |
|
|
private static final int INITIAL_QUEUE_CAPACITY = 1 << 13; |
143 |
|
|
|
144 |
|
|
/** |
145 |
|
|
* Maximum work-stealing queue array size. Must be less than or |
146 |
|
|
* equal to 1 << 28 to ensure lack of index wraparound. (This |
147 |
|
|
* is less than usual bounds, because we need leftshift by 3 |
148 |
|
|
* to be in int range). |
149 |
|
|
*/ |
150 |
|
|
private static final int MAXIMUM_QUEUE_CAPACITY = 1 << 28; |
151 |
|
|
|
152 |
|
|
/** |
153 |
|
|
* The pool this thread works in. Accessed directly by ForkJoinTask. |
154 |
|
|
*/ |
155 |
|
|
final ForkJoinPool pool; |
156 |
|
|
|
157 |
|
|
/** |
158 |
|
|
* The work-stealing queue array. Size must be a power of two. |
159 |
dl |
1.14 |
* Initialized in onStart, to improve memory locality. |
160 |
jsr166 |
1.1 |
*/ |
161 |
|
|
private ForkJoinTask<?>[] queue; |
162 |
|
|
|
163 |
|
|
/** |
164 |
dl |
1.14 |
* Index (mod queue.length) of least valid queue slot, which is |
165 |
|
|
* always the next position to steal from if nonempty. |
166 |
|
|
*/ |
167 |
|
|
private volatile int base; |
168 |
|
|
|
169 |
|
|
/** |
170 |
jsr166 |
1.1 |
* Index (mod queue.length) of next queue slot to push to or pop |
171 |
dl |
1.14 |
* from. It is written only by owner thread, and accessed by other |
172 |
|
|
* threads only after reading (volatile) base. Both sp and base |
173 |
|
|
* are allowed to wrap around on overflow, but (sp - base) still |
174 |
|
|
* estimates size. |
175 |
|
|
*/ |
176 |
|
|
private int sp; |
177 |
jsr166 |
1.1 |
|
178 |
|
|
/** |
179 |
dl |
1.14 |
* Run state of this worker. In addition to the usual run levels, |
180 |
|
|
* tracks if this worker is suspended as a spare, and if it was |
181 |
|
|
* killed (trimmed) while suspended. However, "active" status is |
182 |
|
|
* maintained separately. |
183 |
jsr166 |
1.1 |
*/ |
184 |
dl |
1.14 |
private volatile int runState; |
185 |
|
|
|
186 |
|
|
private static final int TERMINATING = 0x01; |
187 |
|
|
private static final int TERMINATED = 0x02; |
188 |
|
|
private static final int SUSPENDED = 0x04; // inactive spare |
189 |
|
|
private static final int TRIMMED = 0x08; // killed while suspended |
190 |
jsr166 |
1.1 |
|
191 |
|
|
/** |
192 |
dl |
1.14 |
* Number of LockSupport.park calls to block this thread for |
193 |
|
|
* suspension or event waits. Used for internal instrumention; |
194 |
|
|
* currently not exported but included because volatile write upon |
195 |
|
|
* park also provides a workaround for a JVM bug. |
196 |
jsr166 |
1.1 |
*/ |
197 |
dl |
1.14 |
private volatile int parkCount; |
198 |
jsr166 |
1.1 |
|
199 |
|
|
/** |
200 |
dl |
1.14 |
* Number of steals, transferred and reset in pool callbacks pool |
201 |
|
|
* when idle Accessed directly by pool. |
202 |
jsr166 |
1.1 |
*/ |
203 |
dl |
1.14 |
int stealCount; |
204 |
jsr166 |
1.1 |
|
205 |
|
|
/** |
206 |
|
|
* Seed for random number generator for choosing steal victims. |
207 |
dl |
1.14 |
* Uses Marsaglia xorshift. Must be initialized as nonzero. |
208 |
jsr166 |
1.1 |
*/ |
209 |
|
|
private int seed; |
210 |
|
|
|
211 |
|
|
/** |
212 |
dl |
1.14 |
* Activity status. When true, this worker is considered active. |
213 |
|
|
* Accessed directly by pool. Must be false upon construction. |
214 |
|
|
*/ |
215 |
|
|
boolean active; |
216 |
|
|
|
217 |
|
|
/** |
218 |
|
|
* True if use local fifo, not default lifo, for local polling. |
219 |
|
|
* Shadows value from ForkJoinPool, which resets it if changed |
220 |
|
|
* pool-wide. |
221 |
jsr166 |
1.1 |
*/ |
222 |
dl |
1.14 |
private boolean locallyFifo; |
223 |
jsr166 |
1.1 |
|
224 |
|
|
/** |
225 |
|
|
* Index of this worker in pool array. Set once by pool before |
226 |
dl |
1.14 |
* running, and accessed directly by pool to locate this worker in |
227 |
|
|
* its workers array. |
228 |
jsr166 |
1.1 |
*/ |
229 |
|
|
int poolIndex; |
230 |
|
|
|
231 |
|
|
/** |
232 |
dl |
1.14 |
* The last pool event waited for. Accessed only by pool in |
233 |
|
|
* callback methods invoked within this thread. |
234 |
jsr166 |
1.1 |
*/ |
235 |
dl |
1.14 |
int lastEventCount; |
236 |
jsr166 |
1.1 |
|
237 |
|
|
/** |
238 |
dl |
1.14 |
* Encoded index and event count of next event waiter. Used only |
239 |
|
|
* by ForkJoinPool for managing event waiters. |
240 |
jsr166 |
1.1 |
*/ |
241 |
dl |
1.14 |
volatile long nextWaiter; |
242 |
jsr166 |
1.1 |
|
243 |
|
|
/** |
244 |
|
|
* Creates a ForkJoinWorkerThread operating in the given pool. |
245 |
|
|
* |
246 |
|
|
* @param pool the pool this thread works in |
247 |
|
|
* @throws NullPointerException if pool is null |
248 |
|
|
*/ |
249 |
|
|
protected ForkJoinWorkerThread(ForkJoinPool pool) { |
250 |
|
|
if (pool == null) throw new NullPointerException(); |
251 |
|
|
this.pool = pool; |
252 |
dl |
1.14 |
// To avoid exposing construction details to subclasses, |
253 |
|
|
// remaining initialization is in start() and onStart() |
254 |
jsr166 |
1.1 |
} |
255 |
|
|
|
256 |
dl |
1.14 |
/** |
257 |
|
|
* Performs additional initialization and starts this thread |
258 |
|
|
*/ |
259 |
|
|
final void start(int poolIndex, boolean locallyFifo, |
260 |
|
|
UncaughtExceptionHandler ueh) { |
261 |
|
|
this.poolIndex = poolIndex; |
262 |
|
|
this.locallyFifo = locallyFifo; |
263 |
|
|
if (ueh != null) |
264 |
|
|
setUncaughtExceptionHandler(ueh); |
265 |
|
|
setDaemon(true); |
266 |
|
|
start(); |
267 |
|
|
} |
268 |
|
|
|
269 |
|
|
// Public/protected methods |
270 |
jsr166 |
1.1 |
|
271 |
|
|
/** |
272 |
|
|
* Returns the pool hosting this thread. |
273 |
|
|
* |
274 |
|
|
* @return the pool |
275 |
|
|
*/ |
276 |
|
|
public ForkJoinPool getPool() { |
277 |
|
|
return pool; |
278 |
|
|
} |
279 |
|
|
|
280 |
|
|
/** |
281 |
|
|
* Returns the index number of this thread in its pool. The |
282 |
|
|
* returned value ranges from zero to the maximum number of |
283 |
|
|
* threads (minus one) that have ever been created in the pool. |
284 |
|
|
* This method may be useful for applications that track status or |
285 |
|
|
* collect results per-worker rather than per-task. |
286 |
|
|
* |
287 |
|
|
* @return the index number |
288 |
|
|
*/ |
289 |
|
|
public int getPoolIndex() { |
290 |
|
|
return poolIndex; |
291 |
|
|
} |
292 |
|
|
|
293 |
|
|
/** |
294 |
dl |
1.14 |
* Initializes internal state after construction but before |
295 |
|
|
* processing any tasks. If you override this method, you must |
296 |
|
|
* invoke super.onStart() at the beginning of the method. |
297 |
|
|
* Initialization requires care: Most fields must have legal |
298 |
|
|
* default values, to ensure that attempted accesses from other |
299 |
|
|
* threads work correctly even before this thread starts |
300 |
|
|
* processing tasks. |
301 |
jsr166 |
1.1 |
*/ |
302 |
dl |
1.14 |
protected void onStart() { |
303 |
|
|
int rs = seedGenerator.nextInt(); |
304 |
|
|
seed = rs == 0? 1 : rs; // seed must be nonzero |
305 |
jsr166 |
1.1 |
|
306 |
dl |
1.14 |
// Allocate name string and queue array in this thread |
307 |
|
|
String pid = Integer.toString(pool.getPoolNumber()); |
308 |
|
|
String wid = Integer.toString(poolIndex); |
309 |
|
|
setName("ForkJoinPool-" + pid + "-worker-" + wid); |
310 |
jsr166 |
1.1 |
|
311 |
dl |
1.14 |
queue = new ForkJoinTask<?>[INITIAL_QUEUE_CAPACITY]; |
312 |
|
|
} |
313 |
jsr166 |
1.1 |
|
314 |
|
|
/** |
315 |
dl |
1.14 |
* Performs cleanup associated with termination of this worker |
316 |
|
|
* thread. If you override this method, you must invoke |
317 |
|
|
* {@code super.onTermination} at the end of the overridden method. |
318 |
jsr166 |
1.4 |
* |
319 |
dl |
1.14 |
* @param exception the exception causing this thread to abort due |
320 |
|
|
* to an unrecoverable error, or {@code null} if completed normally |
321 |
jsr166 |
1.1 |
*/ |
322 |
dl |
1.14 |
protected void onTermination(Throwable exception) { |
323 |
|
|
try { |
324 |
|
|
cancelTasks(); |
325 |
|
|
setTerminated(); |
326 |
|
|
pool.workerTerminated(this); |
327 |
|
|
} catch (Throwable ex) { // Shouldn't ever happen |
328 |
|
|
if (exception == null) // but if so, at least rethrown |
329 |
|
|
exception = ex; |
330 |
|
|
} finally { |
331 |
|
|
if (exception != null) |
332 |
|
|
UNSAFE.throwException(exception); |
333 |
jsr166 |
1.1 |
} |
334 |
|
|
} |
335 |
|
|
|
336 |
|
|
/** |
337 |
|
|
* This method is required to be public, but should never be |
338 |
|
|
* called explicitly. It performs the main run loop to execute |
339 |
|
|
* ForkJoinTasks. |
340 |
|
|
*/ |
341 |
|
|
public void run() { |
342 |
|
|
Throwable exception = null; |
343 |
|
|
try { |
344 |
|
|
onStart(); |
345 |
|
|
mainLoop(); |
346 |
|
|
} catch (Throwable ex) { |
347 |
|
|
exception = ex; |
348 |
|
|
} finally { |
349 |
|
|
onTermination(exception); |
350 |
|
|
} |
351 |
|
|
} |
352 |
|
|
|
353 |
dl |
1.14 |
// helpers for run() |
354 |
|
|
|
355 |
jsr166 |
1.1 |
/** |
356 |
dl |
1.14 |
* Find and execute tasks and check status while running |
357 |
jsr166 |
1.1 |
*/ |
358 |
|
|
private void mainLoop() { |
359 |
dl |
1.14 |
boolean ran = false; // true if ran task on previous step |
360 |
|
|
ForkJoinPool p = pool; |
361 |
|
|
for (;;) { |
362 |
|
|
p.preStep(this, ran); |
363 |
|
|
if (runState != 0) |
364 |
|
|
return; |
365 |
|
|
ForkJoinTask<?> t; // try to get and run stolen or submitted task |
366 |
|
|
if (ran = (t = scan()) != null || (t = pollSubmission()) != null) { |
367 |
|
|
t.tryExec(); |
368 |
|
|
if (base != sp) |
369 |
|
|
runLocalTasks(); |
370 |
|
|
} |
371 |
jsr166 |
1.1 |
} |
372 |
|
|
} |
373 |
|
|
|
374 |
|
|
/** |
375 |
dl |
1.14 |
* Runs local tasks until queue is empty or shut down. Call only |
376 |
|
|
* while active. |
377 |
jsr166 |
1.1 |
*/ |
378 |
dl |
1.14 |
private void runLocalTasks() { |
379 |
|
|
while (runState == 0) { |
380 |
|
|
ForkJoinTask<?> t = locallyFifo? locallyDeqTask() : popTask(); |
381 |
|
|
if (t != null) |
382 |
|
|
t.tryExec(); |
383 |
|
|
else if (base == sp) |
384 |
|
|
break; |
385 |
|
|
} |
386 |
jsr166 |
1.1 |
} |
387 |
|
|
|
388 |
|
|
/** |
389 |
dl |
1.14 |
* If a submission exists, try to activate and take it |
390 |
jsr166 |
1.1 |
* |
391 |
dl |
1.14 |
* @return a task, if available |
392 |
jsr166 |
1.1 |
*/ |
393 |
dl |
1.14 |
private ForkJoinTask<?> pollSubmission() { |
394 |
|
|
ForkJoinPool p = pool; |
395 |
|
|
while (p.hasQueuedSubmissions()) { |
396 |
|
|
if (active || (active = p.tryIncrementActiveCount())) { |
397 |
|
|
ForkJoinTask<?> t = p.pollSubmission(); |
398 |
|
|
return t != null ? t : scan(); // if missed, rescan |
399 |
jsr166 |
1.1 |
} |
400 |
|
|
} |
401 |
dl |
1.14 |
return null; |
402 |
jsr166 |
1.1 |
} |
403 |
|
|
|
404 |
dl |
1.14 |
/* |
405 |
|
|
* Intrinsics-based atomic writes for queue slots. These are |
406 |
|
|
* basically the same as methods in AtomicObjectArray, but |
407 |
|
|
* specialized for (1) ForkJoinTask elements (2) requirement that |
408 |
|
|
* nullness and bounds checks have already been performed by |
409 |
|
|
* callers and (3) effective offsets are known not to overflow |
410 |
|
|
* from int to long (because of MAXIMUM_QUEUE_CAPACITY). We don't |
411 |
|
|
* need corresponding version for reads: plain array reads are OK |
412 |
|
|
* because they protected by other volatile reads and are |
413 |
|
|
* confirmed by CASes. |
414 |
|
|
* |
415 |
|
|
* Most uses don't actually call these methods, but instead contain |
416 |
|
|
* inlined forms that enable more predictable optimization. We |
417 |
|
|
* don't define the version of write used in pushTask at all, but |
418 |
|
|
* instead inline there a store-fenced array slot write. |
419 |
jsr166 |
1.1 |
*/ |
420 |
|
|
|
421 |
|
|
/** |
422 |
dl |
1.14 |
* CASes slot i of array q from t to null. Caller must ensure q is |
423 |
|
|
* non-null and index is in range. |
424 |
jsr166 |
1.1 |
*/ |
425 |
dl |
1.14 |
private static final boolean casSlotNull(ForkJoinTask<?>[] q, int i, |
426 |
|
|
ForkJoinTask<?> t) { |
427 |
|
|
return UNSAFE.compareAndSwapObject(q, (i << qShift) + qBase, t, null); |
428 |
jsr166 |
1.1 |
} |
429 |
|
|
|
430 |
|
|
/** |
431 |
dl |
1.14 |
* Performs a volatile write of the given task at given slot of |
432 |
|
|
* array q. Caller must ensure q is non-null and index is in |
433 |
|
|
* range. This method is used only during resets and backouts. |
434 |
jsr166 |
1.1 |
*/ |
435 |
dl |
1.14 |
private static final void writeSlot(ForkJoinTask<?>[] q, int i, |
436 |
|
|
ForkJoinTask<?> t) { |
437 |
|
|
UNSAFE.putObjectVolatile(q, (i << qShift) + qBase, t); |
438 |
jsr166 |
1.1 |
} |
439 |
|
|
|
440 |
dl |
1.14 |
// queue methods |
441 |
jsr166 |
1.1 |
|
442 |
|
|
/** |
443 |
dl |
1.14 |
* Pushes a task. Call only from this thread. |
444 |
jsr166 |
1.1 |
* |
445 |
|
|
* @param t the task. Caller must ensure non-null. |
446 |
|
|
*/ |
447 |
|
|
final void pushTask(ForkJoinTask<?> t) { |
448 |
dl |
1.14 |
int s; |
449 |
jsr166 |
1.1 |
ForkJoinTask<?>[] q = queue; |
450 |
dl |
1.14 |
int mask = q.length - 1; // implicit assert q != null |
451 |
|
|
UNSAFE.putOrderedObject(q, (((s = sp++) & mask) << qShift) + qBase, t); |
452 |
|
|
if ((s -= base) <= 0) |
453 |
jsr166 |
1.1 |
pool.signalWork(); |
454 |
dl |
1.14 |
else if (s + 1 >= mask) |
455 |
jsr166 |
1.1 |
growQueue(); |
456 |
|
|
} |
457 |
|
|
|
458 |
|
|
/** |
459 |
|
|
* Tries to take a task from the base of the queue, failing if |
460 |
dl |
1.14 |
* empty or contended. Note: Specializations of this code appear |
461 |
|
|
* in scan and scanWhileJoining. |
462 |
jsr166 |
1.1 |
* |
463 |
|
|
* @return a task, or null if none or contended |
464 |
|
|
*/ |
465 |
|
|
final ForkJoinTask<?> deqTask() { |
466 |
|
|
ForkJoinTask<?> t; |
467 |
|
|
ForkJoinTask<?>[] q; |
468 |
dl |
1.14 |
int b, i; |
469 |
|
|
if ((b = base) != sp && |
470 |
jsr166 |
1.1 |
(q = queue) != null && // must read q after b |
471 |
|
|
(t = q[i = (q.length - 1) & b]) != null && |
472 |
dl |
1.14 |
UNSAFE.compareAndSwapObject(q, (i << qShift) + qBase, t, null)) { |
473 |
jsr166 |
1.1 |
base = b + 1; |
474 |
|
|
return t; |
475 |
|
|
} |
476 |
|
|
return null; |
477 |
|
|
} |
478 |
|
|
|
479 |
|
|
/** |
480 |
dl |
1.14 |
* Tries to take a task from the base of own queue. Assumes active |
481 |
|
|
* status. Called only by current thread. |
482 |
jsr166 |
1.6 |
* |
483 |
|
|
* @return a task, or null if none |
484 |
|
|
*/ |
485 |
|
|
final ForkJoinTask<?> locallyDeqTask() { |
486 |
dl |
1.14 |
ForkJoinTask<?>[] q = queue; |
487 |
|
|
if (q != null) { |
488 |
|
|
ForkJoinTask<?> t; |
489 |
|
|
int b, i; |
490 |
|
|
while (sp != (b = base)) { |
491 |
|
|
if ((t = q[i = (q.length - 1) & b]) != null && |
492 |
|
|
UNSAFE.compareAndSwapObject(q, (i << qShift) + qBase, |
493 |
|
|
t, null)) { |
494 |
jsr166 |
1.6 |
base = b + 1; |
495 |
|
|
return t; |
496 |
|
|
} |
497 |
|
|
} |
498 |
|
|
} |
499 |
|
|
return null; |
500 |
|
|
} |
501 |
|
|
|
502 |
|
|
/** |
503 |
dl |
1.14 |
* Returns a popped task, or null if empty. Assumes active status. |
504 |
|
|
* Called only by current thread. (Note: a specialization of this |
505 |
|
|
* code appears in scanWhileJoining.) |
506 |
jsr166 |
1.1 |
*/ |
507 |
|
|
final ForkJoinTask<?> popTask() { |
508 |
dl |
1.14 |
int s; |
509 |
|
|
ForkJoinTask<?>[] q = queue; |
510 |
|
|
if (q != null && (s = sp) != base) { |
511 |
|
|
int i = (q.length - 1) & --s; |
512 |
|
|
ForkJoinTask<?> t = q[i]; |
513 |
|
|
if (t != null && UNSAFE.compareAndSwapObject |
514 |
|
|
(q, (i << qShift) + qBase, t, null)) { |
515 |
|
|
sp = s; |
516 |
jsr166 |
1.1 |
return t; |
517 |
|
|
} |
518 |
|
|
} |
519 |
|
|
return null; |
520 |
|
|
} |
521 |
|
|
|
522 |
|
|
/** |
523 |
|
|
* Specialized version of popTask to pop only if |
524 |
|
|
* topmost element is the given task. Called only |
525 |
|
|
* by current thread while active. |
526 |
|
|
* |
527 |
|
|
* @param t the task. Caller must ensure non-null. |
528 |
|
|
*/ |
529 |
|
|
final boolean unpushTask(ForkJoinTask<?> t) { |
530 |
dl |
1.14 |
int s; |
531 |
jsr166 |
1.1 |
ForkJoinTask<?>[] q = queue; |
532 |
dl |
1.14 |
if (q != null && UNSAFE.compareAndSwapObject |
533 |
|
|
(q, (((q.length - 1) & (s = sp - 1)) << qShift) + qBase, t, null)){ |
534 |
|
|
sp = s; |
535 |
jsr166 |
1.1 |
return true; |
536 |
|
|
} |
537 |
|
|
return false; |
538 |
|
|
} |
539 |
|
|
|
540 |
|
|
/** |
541 |
jsr166 |
1.6 |
* Returns next task or null if empty or contended |
542 |
jsr166 |
1.1 |
*/ |
543 |
|
|
final ForkJoinTask<?> peekTask() { |
544 |
|
|
ForkJoinTask<?>[] q = queue; |
545 |
|
|
if (q == null) |
546 |
|
|
return null; |
547 |
|
|
int mask = q.length - 1; |
548 |
|
|
int i = locallyFifo ? base : (sp - 1); |
549 |
|
|
return q[i & mask]; |
550 |
|
|
} |
551 |
|
|
|
552 |
|
|
/** |
553 |
|
|
* Doubles queue array size. Transfers elements by emulating |
554 |
|
|
* steals (deqs) from old array and placing, oldest first, into |
555 |
|
|
* new array. |
556 |
|
|
*/ |
557 |
|
|
private void growQueue() { |
558 |
|
|
ForkJoinTask<?>[] oldQ = queue; |
559 |
|
|
int oldSize = oldQ.length; |
560 |
|
|
int newSize = oldSize << 1; |
561 |
|
|
if (newSize > MAXIMUM_QUEUE_CAPACITY) |
562 |
|
|
throw new RejectedExecutionException("Queue capacity exceeded"); |
563 |
|
|
ForkJoinTask<?>[] newQ = queue = new ForkJoinTask<?>[newSize]; |
564 |
|
|
|
565 |
|
|
int b = base; |
566 |
|
|
int bf = b + oldSize; |
567 |
|
|
int oldMask = oldSize - 1; |
568 |
|
|
int newMask = newSize - 1; |
569 |
|
|
do { |
570 |
|
|
int oldIndex = b & oldMask; |
571 |
|
|
ForkJoinTask<?> t = oldQ[oldIndex]; |
572 |
|
|
if (t != null && !casSlotNull(oldQ, oldIndex, t)) |
573 |
|
|
t = null; |
574 |
dl |
1.14 |
writeSlot(newQ, b & newMask, t); |
575 |
jsr166 |
1.1 |
} while (++b != bf); |
576 |
|
|
pool.signalWork(); |
577 |
|
|
} |
578 |
|
|
|
579 |
|
|
/** |
580 |
dl |
1.14 |
* Computes next value for random victim probe in scan(). Scans |
581 |
|
|
* don't require a very high quality generator, but also not a |
582 |
|
|
* crummy one. Marsaglia xor-shift is cheap and works well enough. |
583 |
|
|
* Note: This is manually inlined in scan() |
584 |
|
|
*/ |
585 |
|
|
private static final int xorShift(int r) { |
586 |
|
|
r ^= r << 13; |
587 |
|
|
r ^= r >>> 17; |
588 |
|
|
return r ^ (r << 5); |
589 |
|
|
} |
590 |
|
|
|
591 |
|
|
/** |
592 |
jsr166 |
1.1 |
* Tries to steal a task from another worker. Starts at a random |
593 |
|
|
* index of workers array, and probes workers until finding one |
594 |
|
|
* with non-empty queue or finding that all are empty. It |
595 |
|
|
* randomly selects the first n probes. If these are empty, it |
596 |
dl |
1.14 |
* resorts to a circular sweep, which is necessary to accurately |
597 |
|
|
* set active status. (The circular sweep uses steps of |
598 |
|
|
* approximately half the array size plus 1, to avoid bias |
599 |
|
|
* stemming from leftmost packing of the array in ForkJoinPool.) |
600 |
jsr166 |
1.1 |
* |
601 |
|
|
* This method must be both fast and quiet -- usually avoiding |
602 |
|
|
* memory accesses that could disrupt cache sharing etc other than |
603 |
dl |
1.14 |
* those needed to check for and take tasks (or to activate if not |
604 |
|
|
* already active). This accounts for, among other things, |
605 |
|
|
* updating random seed in place without storing it until exit. |
606 |
jsr166 |
1.1 |
* |
607 |
|
|
* @return a task, or null if none found |
608 |
|
|
*/ |
609 |
|
|
private ForkJoinTask<?> scan() { |
610 |
dl |
1.14 |
ForkJoinPool p = pool; |
611 |
|
|
ForkJoinWorkerThread[] ws = p.workers; |
612 |
|
|
int n = ws.length; // upper bound of #workers |
613 |
|
|
boolean canSteal = active; // shadow active status |
614 |
|
|
int r = seed; // extract seed once |
615 |
|
|
int k = r; // index: random if j<0 else step |
616 |
|
|
for (int j = -n; j < n; ++j) { |
617 |
|
|
ForkJoinWorkerThread v = ws[k & (n - 1)]; |
618 |
|
|
r ^= r << 13; r ^= r >>> 17; r ^= r << 5; // xorshift |
619 |
|
|
if (v != null && v.base != v.sp) { |
620 |
|
|
if (canSteal || // ensure active status |
621 |
|
|
(canSteal = active = p.tryIncrementActiveCount())) { |
622 |
|
|
int b, i; // inlined specialization of deqTask |
623 |
|
|
ForkJoinTask<?> t; |
624 |
|
|
ForkJoinTask<?>[] q; |
625 |
|
|
if ((b = v.base) != v.sp && // recheck |
626 |
|
|
(q = v.queue) != null && |
627 |
|
|
(t = q[i = (q.length - 1) & b]) != null && |
628 |
|
|
UNSAFE.compareAndSwapObject |
629 |
|
|
(q, (i << qShift) + qBase, t, null)) { |
630 |
|
|
v.base = b + 1; |
631 |
|
|
seed = r; |
632 |
|
|
++stealCount; |
633 |
|
|
return t; |
634 |
jsr166 |
1.1 |
} |
635 |
|
|
} |
636 |
dl |
1.15 |
j = -n; // reset on contention |
637 |
jsr166 |
1.1 |
} |
638 |
dl |
1.14 |
k = j >= 0? k + ((n >>> 1) | 1) : r; |
639 |
|
|
} |
640 |
|
|
return null; |
641 |
jsr166 |
1.1 |
} |
642 |
|
|
|
643 |
dl |
1.14 |
// Run State management |
644 |
|
|
|
645 |
|
|
// status check methods used mainly by ForkJoinPool |
646 |
|
|
final boolean isTerminating() { return (runState & TERMINATING) != 0; } |
647 |
|
|
final boolean isTerminated() { return (runState & TERMINATED) != 0; } |
648 |
|
|
final boolean isSuspended() { return (runState & SUSPENDED) != 0; } |
649 |
|
|
final boolean isTrimmed() { return (runState & TRIMMED) != 0; } |
650 |
|
|
|
651 |
jsr166 |
1.1 |
/** |
652 |
dl |
1.14 |
* Sets state to TERMINATING, also resuming if suspended. |
653 |
|
|
*/ |
654 |
|
|
final void shutdown() { |
655 |
|
|
for (;;) { |
656 |
|
|
int s = runState; |
657 |
|
|
if ((s & SUSPENDED) != 0) { // kill and wakeup if suspended |
658 |
|
|
if (UNSAFE.compareAndSwapInt(this, runStateOffset, s, |
659 |
|
|
(s & ~SUSPENDED) | |
660 |
|
|
(TRIMMED|TERMINATING))) { |
661 |
|
|
LockSupport.unpark(this); |
662 |
|
|
break; |
663 |
|
|
} |
664 |
|
|
} |
665 |
|
|
else if (UNSAFE.compareAndSwapInt(this, runStateOffset, s, |
666 |
|
|
s | TERMINATING)) |
667 |
|
|
break; |
668 |
|
|
} |
669 |
|
|
} |
670 |
|
|
|
671 |
|
|
/** |
672 |
|
|
* Sets state to TERMINATED. Called only by this thread. |
673 |
|
|
*/ |
674 |
|
|
private void setTerminated() { |
675 |
|
|
int s; |
676 |
|
|
do {} while (!UNSAFE.compareAndSwapInt(this, runStateOffset, |
677 |
|
|
s = runState, |
678 |
|
|
s | (TERMINATING|TERMINATED))); |
679 |
|
|
} |
680 |
|
|
|
681 |
|
|
/** |
682 |
|
|
* Instrumented version of park. Also used by ForkJoinPool.awaitEvent |
683 |
jsr166 |
1.1 |
*/ |
684 |
dl |
1.14 |
final void doPark() { |
685 |
|
|
++parkCount; |
686 |
|
|
LockSupport.park(this); |
687 |
jsr166 |
1.1 |
} |
688 |
|
|
|
689 |
|
|
/** |
690 |
dl |
1.14 |
* If suspended, tries to set status to unsuspended. |
691 |
|
|
* Caller must unpark to actually resume |
692 |
jsr166 |
1.1 |
* |
693 |
dl |
1.14 |
* @return true if successful |
694 |
jsr166 |
1.1 |
*/ |
695 |
dl |
1.14 |
final boolean tryUnsuspend() { |
696 |
|
|
int s; |
697 |
|
|
return (((s = runState) & SUSPENDED) != 0 && |
698 |
|
|
UNSAFE.compareAndSwapInt(this, runStateOffset, s, |
699 |
|
|
s & ~SUSPENDED)); |
700 |
jsr166 |
1.1 |
} |
701 |
|
|
|
702 |
|
|
/** |
703 |
dl |
1.14 |
* Sets suspended status and blocks as spare until resumed, |
704 |
|
|
* shutdown, or timed out. |
705 |
jsr166 |
1.1 |
* |
706 |
dl |
1.14 |
* @return false if trimmed |
707 |
jsr166 |
1.1 |
*/ |
708 |
dl |
1.14 |
final boolean suspendAsSpare() { |
709 |
|
|
for (;;) { // set suspended unless terminating |
710 |
|
|
int s = runState; |
711 |
|
|
if ((s & TERMINATING) != 0) { // must kill |
712 |
|
|
if (UNSAFE.compareAndSwapInt(this, runStateOffset, s, |
713 |
|
|
s | (TRIMMED | TERMINATING))) |
714 |
|
|
return false; |
715 |
|
|
} |
716 |
|
|
else if (UNSAFE.compareAndSwapInt(this, runStateOffset, s, |
717 |
|
|
s | SUSPENDED)) |
718 |
|
|
break; |
719 |
|
|
} |
720 |
|
|
lastEventCount = 0; // reset upon resume |
721 |
jsr166 |
1.1 |
ForkJoinPool p = pool; |
722 |
dl |
1.14 |
p.releaseWaiters(); // help others progress |
723 |
|
|
p.accumulateStealCount(this); |
724 |
|
|
interrupted(); // clear/ignore interrupts |
725 |
|
|
if (poolIndex < p.getParallelism()) { // untimed wait |
726 |
|
|
while ((runState & SUSPENDED) != 0) |
727 |
|
|
doPark(); |
728 |
|
|
return true; |
729 |
|
|
} |
730 |
|
|
return timedSuspend(); // timed wait if apparently non-core |
731 |
|
|
} |
732 |
|
|
|
733 |
|
|
/** |
734 |
|
|
* Blocks as spare until resumed or timed out |
735 |
|
|
* @return false if trimmed |
736 |
|
|
*/ |
737 |
|
|
private boolean timedSuspend() { |
738 |
|
|
long nanos = SPARE_KEEPALIVE_NANOS; |
739 |
|
|
long startTime = System.nanoTime(); |
740 |
|
|
while ((runState & SUSPENDED) != 0) { |
741 |
|
|
++parkCount; |
742 |
|
|
if ((nanos -= (System.nanoTime() - startTime)) > 0) |
743 |
|
|
LockSupport.parkNanos(this, nanos); |
744 |
|
|
else { // try to trim on timeout |
745 |
|
|
int s = runState; |
746 |
|
|
if (UNSAFE.compareAndSwapInt(this, runStateOffset, s, |
747 |
|
|
(s & ~SUSPENDED) | |
748 |
|
|
(TRIMMED|TERMINATING))) |
749 |
|
|
return false; |
750 |
|
|
} |
751 |
jsr166 |
1.1 |
} |
752 |
dl |
1.14 |
return true; |
753 |
|
|
} |
754 |
|
|
|
755 |
|
|
// Misc support methods for ForkJoinPool |
756 |
|
|
|
757 |
|
|
/** |
758 |
|
|
* Returns an estimate of the number of tasks in the queue. Also |
759 |
|
|
* used by ForkJoinTask. |
760 |
|
|
*/ |
761 |
|
|
final int getQueueSize() { |
762 |
|
|
return -base + sp; |
763 |
jsr166 |
1.1 |
} |
764 |
|
|
|
765 |
dl |
1.14 |
/** |
766 |
|
|
* Set locallyFifo mode. Called only by ForkJoinPool |
767 |
|
|
*/ |
768 |
|
|
final void setAsyncMode(boolean async) { |
769 |
|
|
locallyFifo = async; |
770 |
|
|
} |
771 |
jsr166 |
1.1 |
|
772 |
|
|
/** |
773 |
|
|
* Removes and cancels all tasks in queue. Can be called from any |
774 |
|
|
* thread. |
775 |
|
|
*/ |
776 |
|
|
final void cancelTasks() { |
777 |
dl |
1.14 |
while (base != sp) { |
778 |
|
|
ForkJoinTask<?> t = deqTask(); |
779 |
|
|
if (t != null) |
780 |
|
|
t.cancelIgnoringExceptions(); |
781 |
|
|
} |
782 |
jsr166 |
1.1 |
} |
783 |
|
|
|
784 |
|
|
/** |
785 |
|
|
* Drains tasks to given collection c. |
786 |
|
|
* |
787 |
|
|
* @return the number of tasks drained |
788 |
|
|
*/ |
789 |
jsr166 |
1.5 |
final int drainTasksTo(Collection<? super ForkJoinTask<?>> c) { |
790 |
jsr166 |
1.1 |
int n = 0; |
791 |
dl |
1.14 |
while (base != sp) { |
792 |
|
|
ForkJoinTask<?> t = deqTask(); |
793 |
|
|
if (t != null) { |
794 |
|
|
c.add(t); |
795 |
|
|
++n; |
796 |
|
|
} |
797 |
jsr166 |
1.1 |
} |
798 |
|
|
return n; |
799 |
|
|
} |
800 |
|
|
|
801 |
dl |
1.14 |
// Support methods for ForkJoinTask |
802 |
|
|
|
803 |
jsr166 |
1.1 |
/** |
804 |
dl |
1.14 |
* Returns an estimate of the number of tasks, offset by a |
805 |
|
|
* function of number of idle workers. |
806 |
|
|
* |
807 |
|
|
* This method provides a cheap heuristic guide for task |
808 |
|
|
* partitioning when programmers, frameworks, tools, or languages |
809 |
|
|
* have little or no idea about task granularity. In essence by |
810 |
|
|
* offering this method, we ask users only about tradeoffs in |
811 |
|
|
* overhead vs expected throughput and its variance, rather than |
812 |
|
|
* how finely to partition tasks. |
813 |
|
|
* |
814 |
|
|
* In a steady state strict (tree-structured) computation, each |
815 |
|
|
* thread makes available for stealing enough tasks for other |
816 |
|
|
* threads to remain active. Inductively, if all threads play by |
817 |
|
|
* the same rules, each thread should make available only a |
818 |
|
|
* constant number of tasks. |
819 |
|
|
* |
820 |
|
|
* The minimum useful constant is just 1. But using a value of 1 |
821 |
|
|
* would require immediate replenishment upon each steal to |
822 |
|
|
* maintain enough tasks, which is infeasible. Further, |
823 |
|
|
* partitionings/granularities of offered tasks should minimize |
824 |
|
|
* steal rates, which in general means that threads nearer the top |
825 |
|
|
* of computation tree should generate more than those nearer the |
826 |
|
|
* bottom. In perfect steady state, each thread is at |
827 |
|
|
* approximately the same level of computation tree. However, |
828 |
|
|
* producing extra tasks amortizes the uncertainty of progress and |
829 |
|
|
* diffusion assumptions. |
830 |
|
|
* |
831 |
|
|
* So, users will want to use values larger, but not much larger |
832 |
|
|
* than 1 to both smooth over transient shortages and hedge |
833 |
|
|
* against uneven progress; as traded off against the cost of |
834 |
|
|
* extra task overhead. We leave the user to pick a threshold |
835 |
|
|
* value to compare with the results of this call to guide |
836 |
|
|
* decisions, but recommend values such as 3. |
837 |
|
|
* |
838 |
|
|
* When all threads are active, it is on average OK to estimate |
839 |
|
|
* surplus strictly locally. In steady-state, if one thread is |
840 |
|
|
* maintaining say 2 surplus tasks, then so are others. So we can |
841 |
|
|
* just use estimated queue length (although note that (sp - base) |
842 |
|
|
* can be an overestimate because of stealers lagging increments |
843 |
|
|
* of base). However, this strategy alone leads to serious |
844 |
|
|
* mis-estimates in some non-steady-state conditions (ramp-up, |
845 |
|
|
* ramp-down, other stalls). We can detect many of these by |
846 |
|
|
* further considering the number of "idle" threads, that are |
847 |
|
|
* known to have zero queued tasks, so compensate by a factor of |
848 |
|
|
* (#idle/#active) threads. |
849 |
jsr166 |
1.1 |
*/ |
850 |
dl |
1.14 |
final int getEstimatedSurplusTaskCount() { |
851 |
|
|
return sp - base - pool.idlePerActive(); |
852 |
jsr166 |
1.1 |
} |
853 |
|
|
|
854 |
|
|
/** |
855 |
dl |
1.14 |
* Gets and removes a local task. |
856 |
jsr166 |
1.1 |
* |
857 |
dl |
1.14 |
* @return a task, if available |
858 |
jsr166 |
1.1 |
*/ |
859 |
dl |
1.14 |
final ForkJoinTask<?> pollLocalTask() { |
860 |
|
|
while (base != sp) { |
861 |
|
|
if (active || (active = pool.tryIncrementActiveCount())) |
862 |
|
|
return locallyFifo? locallyDeqTask() : popTask(); |
863 |
jsr166 |
1.1 |
} |
864 |
dl |
1.14 |
return null; |
865 |
jsr166 |
1.1 |
} |
866 |
|
|
|
867 |
|
|
/** |
868 |
dl |
1.14 |
* Gets and removes a local or stolen task. |
869 |
|
|
* |
870 |
|
|
* @return a task, if available |
871 |
jsr166 |
1.1 |
*/ |
872 |
dl |
1.14 |
final ForkJoinTask<?> pollTask() { |
873 |
|
|
ForkJoinTask<?> t; |
874 |
|
|
return (t = pollLocalTask()) != null ? t : scan(); |
875 |
jsr166 |
1.1 |
} |
876 |
|
|
|
877 |
|
|
/** |
878 |
dl |
1.15 |
* Returns a popped or stolen task, if available, unless joinMe is done |
879 |
dl |
1.14 |
* |
880 |
|
|
* This method is intrinsically nonmodular. To maintain the |
881 |
|
|
* property that tasks are never stolen if the awaited task is |
882 |
|
|
* ready, we must interleave mechanics of scan with status |
883 |
|
|
* checks. We rely here on the commit points of deq that allow us |
884 |
|
|
* to cancel a steal even after CASing slot to null, but before |
885 |
|
|
* adjusting base index: If, after the CAS, we see that joinMe is |
886 |
|
|
* ready, we can back out by placing the task back into the slot, |
887 |
|
|
* without adjusting index. The scan loop is otherwise the same as |
888 |
|
|
* in scan. |
889 |
|
|
* |
890 |
jsr166 |
1.1 |
*/ |
891 |
|
|
final ForkJoinTask<?> scanWhileJoining(ForkJoinTask<?> joinMe) { |
892 |
dl |
1.15 |
ForkJoinTask<?> popped; // prefer local tasks |
893 |
|
|
if (base != sp && (popped = popWhileJoining(joinMe)) != null) |
894 |
|
|
return popped; |
895 |
|
|
if (joinMe.status >= 0) { |
896 |
|
|
ForkJoinPool p = pool; |
897 |
dl |
1.14 |
ForkJoinWorkerThread[] ws = p.workers; |
898 |
|
|
int n = ws.length; |
899 |
dl |
1.15 |
int r = seed; |
900 |
dl |
1.14 |
int k = r; |
901 |
dl |
1.15 |
for (int j = -n; j < n && joinMe.status >= 0; ++j) { |
902 |
dl |
1.14 |
ForkJoinWorkerThread v = ws[k & (n - 1)]; |
903 |
|
|
r ^= r << 13; r ^= r >>> 17; r ^= r << 5; // xorshift |
904 |
|
|
if (v != null) { |
905 |
|
|
int b = v.base; |
906 |
|
|
ForkJoinTask<?>[] q; |
907 |
|
|
if (b != v.sp && (q = v.queue) != null) { |
908 |
|
|
int i = (q.length - 1) & b; |
909 |
|
|
ForkJoinTask<?> t = q[i]; |
910 |
dl |
1.15 |
if (t != null && UNSAFE.compareAndSwapObject |
911 |
|
|
(q, (i << qShift) + qBase, t, null)) { |
912 |
|
|
if (joinMe.status >= 0) { |
913 |
dl |
1.14 |
v.base = b + 1; |
914 |
|
|
seed = r; |
915 |
|
|
++stealCount; |
916 |
|
|
return t; |
917 |
|
|
} |
918 |
dl |
1.15 |
UNSAFE.putObjectVolatile(q, (i<<qShift)+qBase, t); |
919 |
|
|
break; // back out |
920 |
dl |
1.14 |
} |
921 |
dl |
1.15 |
j = -n; |
922 |
dl |
1.14 |
} |
923 |
|
|
} |
924 |
|
|
k = j >= 0? k + ((n >>> 1) | 1) : r; |
925 |
|
|
} |
926 |
dl |
1.15 |
} |
927 |
|
|
return null; |
928 |
|
|
} |
929 |
|
|
|
930 |
|
|
/** |
931 |
|
|
* Version of popTask with join checks surrounding extraction. |
932 |
|
|
* Uses the same backout strategy as scanWhileJoining. Note that |
933 |
|
|
* we ignore locallyFifo flag for local tasks here since helping |
934 |
|
|
* joins only make sense in LIFO mode. |
935 |
|
|
* |
936 |
|
|
* @return a popped task, if available, unless joinMe is done |
937 |
|
|
*/ |
938 |
|
|
private ForkJoinTask<?> popWhileJoining(ForkJoinTask<?> joinMe) { |
939 |
|
|
int s; |
940 |
|
|
ForkJoinTask<?>[] q; |
941 |
|
|
while ((s = sp) != base && (q = queue) != null && joinMe.status >= 0) { |
942 |
|
|
int i = (q.length - 1) & --s; |
943 |
|
|
ForkJoinTask<?> t = q[i]; |
944 |
|
|
if (t != null && UNSAFE.compareAndSwapObject |
945 |
|
|
(q, (i << qShift) + qBase, t, null)) { |
946 |
|
|
if (joinMe.status >= 0) { |
947 |
|
|
sp = s; |
948 |
|
|
return t; |
949 |
|
|
} |
950 |
|
|
UNSAFE.putObjectVolatile(q, (i << qShift) + qBase, t); |
951 |
|
|
break; // back out |
952 |
|
|
} |
953 |
jsr166 |
1.1 |
} |
954 |
dl |
1.14 |
return null; |
955 |
jsr166 |
1.1 |
} |
956 |
|
|
|
957 |
|
|
/** |
958 |
|
|
* Runs tasks until {@code pool.isQuiescent()}. |
959 |
|
|
*/ |
960 |
|
|
final void helpQuiescePool() { |
961 |
|
|
for (;;) { |
962 |
dl |
1.14 |
ForkJoinTask<?> t = pollLocalTask(); |
963 |
|
|
if (t != null || (t = scan()) != null) |
964 |
|
|
t.tryExec(); |
965 |
|
|
else { |
966 |
|
|
ForkJoinPool p = pool; |
967 |
|
|
if (active) { |
968 |
|
|
active = false; // inactivate |
969 |
|
|
do {} while (!p.tryDecrementActiveCount()); |
970 |
|
|
} |
971 |
|
|
if (p.isQuiescent()) { |
972 |
|
|
active = true; // re-activate |
973 |
|
|
do {} while (!p.tryIncrementActiveCount()); |
974 |
|
|
return; |
975 |
|
|
} |
976 |
|
|
} |
977 |
jsr166 |
1.1 |
} |
978 |
|
|
} |
979 |
|
|
|
980 |
|
|
// Unsafe mechanics |
981 |
|
|
|
982 |
|
|
private static final sun.misc.Unsafe UNSAFE = sun.misc.Unsafe.getUnsafe(); |
983 |
jsr166 |
1.2 |
private static final long runStateOffset = |
984 |
jsr166 |
1.3 |
objectFieldOffset("runState", ForkJoinWorkerThread.class); |
985 |
dl |
1.14 |
private static final long qBase = |
986 |
|
|
UNSAFE.arrayBaseOffset(ForkJoinTask[].class); |
987 |
jsr166 |
1.2 |
private static final int qShift; |
988 |
jsr166 |
1.1 |
|
989 |
|
|
static { |
990 |
|
|
int s = UNSAFE.arrayIndexScale(ForkJoinTask[].class); |
991 |
|
|
if ((s & (s-1)) != 0) |
992 |
|
|
throw new Error("data type scale not a power of two"); |
993 |
|
|
qShift = 31 - Integer.numberOfLeadingZeros(s); |
994 |
|
|
} |
995 |
jsr166 |
1.3 |
|
996 |
|
|
private static long objectFieldOffset(String field, Class<?> klazz) { |
997 |
|
|
try { |
998 |
|
|
return UNSAFE.objectFieldOffset(klazz.getDeclaredField(field)); |
999 |
|
|
} catch (NoSuchFieldException e) { |
1000 |
|
|
// Convert Exception to corresponding Error |
1001 |
|
|
NoSuchFieldError error = new NoSuchFieldError(field); |
1002 |
|
|
error.initCause(e); |
1003 |
|
|
throw error; |
1004 |
|
|
} |
1005 |
|
|
} |
1006 |
jsr166 |
1.1 |
} |