ViewVC Help
View File | Revision Log | Show Annotations | Download File | Root Listing
root/jsr166/jsr166/src/main/java/util/concurrent/ForkJoinWorkerThread.java
Revision: 1.13
Committed: Mon Nov 16 04:16:42 2009 UTC (14 years, 6 months ago) by jsr166
Branch: MAIN
Changes since 1.12: +2 -2 lines
Log Message:
whitespace

File Contents

# User Rev Content
1 jsr166 1.1 /*
2     * Written by Doug Lea with assistance from members of JCP JSR-166
3     * Expert Group and released to the public domain, as explained at
4     * http://creativecommons.org/licenses/publicdomain
5     */
6    
7     package java.util.concurrent;
8    
9     import java.util.Collection;
10    
11     /**
12     * A thread managed by a {@link ForkJoinPool}. This class is
13     * subclassable solely for the sake of adding functionality -- there
14 jsr166 1.7 * are no overridable methods dealing with scheduling or execution.
15     * However, you can override initialization and termination methods
16     * surrounding the main task processing loop. If you do create such a
17     * subclass, you will also need to supply a custom {@link
18     * ForkJoinPool.ForkJoinWorkerThreadFactory} to use it in a {@code
19     * ForkJoinPool}.
20 jsr166 1.1 *
21     * @since 1.7
22     * @author Doug Lea
23     */
24     public class ForkJoinWorkerThread extends Thread {
25     /*
26     * Algorithm overview:
27     *
28     * 1. Work-Stealing: Work-stealing queues are special forms of
29     * Deques that support only three of the four possible
30     * end-operations -- push, pop, and deq (aka steal), and only do
31     * so under the constraints that push and pop are called only from
32     * the owning thread, while deq may be called from other threads.
33     * (If you are unfamiliar with them, you probably want to read
34     * Herlihy and Shavit's book "The Art of Multiprocessor
35     * programming", chapter 16 describing these in more detail before
36     * proceeding.) The main work-stealing queue design is roughly
37     * similar to "Dynamic Circular Work-Stealing Deque" by David
38     * Chase and Yossi Lev, SPAA 2005
39     * (http://research.sun.com/scalable/pubs/index.html). The main
40     * difference ultimately stems from gc requirements that we null
41     * out taken slots as soon as we can, to maintain as small a
42     * footprint as possible even in programs generating huge numbers
43     * of tasks. To accomplish this, we shift the CAS arbitrating pop
44     * vs deq (steal) from being on the indices ("base" and "sp") to
45     * the slots themselves (mainly via method "casSlotNull()"). So,
46     * both a successful pop and deq mainly entail CAS'ing a non-null
47     * slot to null. Because we rely on CASes of references, we do
48     * not need tag bits on base or sp. They are simple ints as used
49     * in any circular array-based queue (see for example ArrayDeque).
50     * Updates to the indices must still be ordered in a way that
51     * guarantees that (sp - base) > 0 means the queue is empty, but
52     * otherwise may err on the side of possibly making the queue
53     * appear nonempty when a push, pop, or deq have not fully
54     * committed. Note that this means that the deq operation,
55     * considered individually, is not wait-free. One thief cannot
56     * successfully continue until another in-progress one (or, if
57     * previously empty, a push) completes. However, in the
58 jsr166 1.8 * aggregate, we ensure at least probabilistic
59     * non-blockingness. If an attempted steal fails, a thief always
60     * chooses a different random victim target to try next. So, in
61     * order for one thief to progress, it suffices for any
62     * in-progress deq or new push on any empty queue to complete. One
63     * reason this works well here is that apparently-nonempty often
64     * means soon-to-be-stealable, which gives threads a chance to
65     * activate if necessary before stealing (see below).
66 jsr166 1.1 *
67 jsr166 1.6 * This approach also enables support for "async mode" where local
68     * task processing is in FIFO, not LIFO order; simply by using a
69     * version of deq rather than pop when locallyFifo is true (as set
70     * by the ForkJoinPool). This allows use in message-passing
71     * frameworks in which tasks are never joined.
72     *
73 jsr166 1.1 * Efficient implementation of this approach currently relies on
74     * an uncomfortable amount of "Unsafe" mechanics. To maintain
75     * correct orderings, reads and writes of variable base require
76     * volatile ordering. Variable sp does not require volatile write
77     * but needs cheaper store-ordering on writes. Because they are
78     * protected by volatile base reads, reads of the queue array and
79     * its slots do not need volatile load semantics, but writes (in
80     * push) require store order and CASes (in pop and deq) require
81 jsr166 1.8 * (volatile) CAS semantics. (See "Idempotent work stealing" by
82     * Michael, Saraswat, and Vechev, PPoPP 2009
83     * http://portal.acm.org/citation.cfm?id=1504186 for an algorithm
84     * with similar properties, but without support for nulling
85     * slots.) Since these combinations aren't supported using
86     * ordinary volatiles, the only way to accomplish these
87     * efficiently is to use direct Unsafe calls. (Using external
88 jsr166 1.1 * AtomicIntegers and AtomicReferenceArrays for the indices and
89     * array is significantly slower because of memory locality and
90 jsr166 1.8 * indirection effects.)
91 jsr166 1.9 *
92 jsr166 1.8 * Further, performance on most platforms is very sensitive to
93     * placement and sizing of the (resizable) queue array. Even
94     * though these queues don't usually become all that big, the
95     * initial size must be large enough to counteract cache
96 jsr166 1.1 * contention effects across multiple queues (especially in the
97     * presence of GC cardmarking). Also, to improve thread-locality,
98     * queues are currently initialized immediately after the thread
99     * gets the initial signal to start processing tasks. However,
100     * all queue-related methods except pushTask are written in a way
101     * that allows them to instead be lazily allocated and/or disposed
102     * of when empty. All together, these low-level implementation
103     * choices produce as much as a factor of 4 performance
104     * improvement compared to naive implementations, and enable the
105     * processing of billions of tasks per second, sometimes at the
106     * expense of ugliness.
107     *
108     * 2. Run control: The primary run control is based on a global
109     * counter (activeCount) held by the pool. It uses an algorithm
110     * similar to that in Herlihy and Shavit section 17.6 to cause
111     * threads to eventually block when all threads declare they are
112 jsr166 1.8 * inactive. For this to work, threads must be declared active
113     * when executing tasks, and before stealing a task. They must be
114     * inactive before blocking on the Pool Barrier (awaiting a new
115     * submission or other Pool event). In between, there is some free
116     * play which we take advantage of to avoid contention and rapid
117     * flickering of the global activeCount: If inactive, we activate
118     * only if a victim queue appears to be nonempty (see above).
119     * Similarly, a thread tries to inactivate only after a full scan
120     * of other threads. The net effect is that contention on
121     * activeCount is rarely a measurable performance issue. (There
122     * are also a few other cases where we scan for work rather than
123     * retry/block upon contention.)
124 jsr166 1.1 *
125     * 3. Selection control. We maintain policy of always choosing to
126     * run local tasks rather than stealing, and always trying to
127     * steal tasks before trying to run a new submission. All steals
128     * are currently performed in randomly-chosen deq-order. It may be
129     * worthwhile to bias these with locality / anti-locality
130     * information, but doing this well probably requires more
131     * lower-level information from JVMs than currently provided.
132     */
133    
134     /**
135     * Capacity of work-stealing queue array upon initialization.
136     * Must be a power of two. Initial size must be at least 2, but is
137     * padded to minimize cache effects.
138     */
139     private static final int INITIAL_QUEUE_CAPACITY = 1 << 13;
140    
141     /**
142     * Maximum work-stealing queue array size. Must be less than or
143     * equal to 1 << 28 to ensure lack of index wraparound. (This
144     * is less than usual bounds, because we need leftshift by 3
145     * to be in int range).
146     */
147     private static final int MAXIMUM_QUEUE_CAPACITY = 1 << 28;
148    
149     /**
150     * The pool this thread works in. Accessed directly by ForkJoinTask.
151     */
152     final ForkJoinPool pool;
153    
154     /**
155     * The work-stealing queue array. Size must be a power of two.
156     * Initialized when thread starts, to improve memory locality.
157     */
158     private ForkJoinTask<?>[] queue;
159    
160     /**
161     * Index (mod queue.length) of next queue slot to push to or pop
162     * from. It is written only by owner thread, via ordered store.
163     * Both sp and base are allowed to wrap around on overflow, but
164     * (sp - base) still estimates size.
165     */
166     private volatile int sp;
167    
168     /**
169     * Index (mod queue.length) of least valid queue slot, which is
170     * always the next position to steal from if nonempty.
171     */
172     private volatile int base;
173    
174     /**
175     * Activity status. When true, this worker is considered active.
176     * Must be false upon construction. It must be true when executing
177     * tasks, and BEFORE stealing a task. It must be false before
178     * calling pool.sync.
179     */
180     private boolean active;
181    
182     /**
183     * Run state of this worker. Supports simple versions of the usual
184     * shutdown/shutdownNow control.
185     */
186     private volatile int runState;
187    
188     /**
189     * Seed for random number generator for choosing steal victims.
190     * Uses Marsaglia xorshift. Must be nonzero upon initialization.
191     */
192     private int seed;
193    
194     /**
195     * Number of steals, transferred to pool when idle
196     */
197     private int stealCount;
198    
199     /**
200     * Index of this worker in pool array. Set once by pool before
201     * running, and accessed directly by pool during cleanup etc.
202     */
203     int poolIndex;
204    
205     /**
206     * The last barrier event waited for. Accessed in pool callback
207     * methods, but only by current thread.
208     */
209     long lastEventCount;
210    
211     /**
212     * True if use local fifo, not default lifo, for local polling
213     */
214     private boolean locallyFifo;
215    
216     /**
217     * Creates a ForkJoinWorkerThread operating in the given pool.
218     *
219     * @param pool the pool this thread works in
220     * @throws NullPointerException if pool is null
221     */
222     protected ForkJoinWorkerThread(ForkJoinPool pool) {
223     if (pool == null) throw new NullPointerException();
224     this.pool = pool;
225     // Note: poolIndex is set by pool during construction
226     // Remaining initialization is deferred to onStart
227     }
228    
229     // Public access methods
230    
231     /**
232     * Returns the pool hosting this thread.
233     *
234     * @return the pool
235     */
236     public ForkJoinPool getPool() {
237     return pool;
238     }
239    
240     /**
241     * Returns the index number of this thread in its pool. The
242     * returned value ranges from zero to the maximum number of
243     * threads (minus one) that have ever been created in the pool.
244     * This method may be useful for applications that track status or
245     * collect results per-worker rather than per-task.
246     *
247     * @return the index number
248     */
249     public int getPoolIndex() {
250     return poolIndex;
251     }
252    
253     /**
254     * Establishes local first-in-first-out scheduling mode for forked
255     * tasks that are never joined.
256     *
257     * @param async if true, use locally FIFO scheduling
258     */
259     void setAsyncMode(boolean async) {
260     locallyFifo = async;
261     }
262    
263     // Runstate management
264    
265     // Runstate values. Order matters
266     private static final int RUNNING = 0;
267     private static final int SHUTDOWN = 1;
268     private static final int TERMINATING = 2;
269     private static final int TERMINATED = 3;
270    
271     final boolean isShutdown() { return runState >= SHUTDOWN; }
272     final boolean isTerminating() { return runState >= TERMINATING; }
273     final boolean isTerminated() { return runState == TERMINATED; }
274     final boolean shutdown() { return transitionRunStateTo(SHUTDOWN); }
275     final boolean shutdownNow() { return transitionRunStateTo(TERMINATING); }
276    
277     /**
278 jsr166 1.4 * Transitions to at least the given state.
279     *
280     * @return {@code true} if not already at least at given state
281 jsr166 1.1 */
282     private boolean transitionRunStateTo(int state) {
283     for (;;) {
284     int s = runState;
285     if (s >= state)
286     return false;
287     if (UNSAFE.compareAndSwapInt(this, runStateOffset, s, state))
288     return true;
289     }
290     }
291    
292     /**
293     * Tries to set status to active; fails on contention.
294     */
295     private boolean tryActivate() {
296     if (!active) {
297     if (!pool.tryIncrementActiveCount())
298     return false;
299     active = true;
300     }
301     return true;
302     }
303    
304     /**
305     * Tries to set status to inactive; fails on contention.
306     */
307     private boolean tryInactivate() {
308     if (active) {
309     if (!pool.tryDecrementActiveCount())
310     return false;
311     active = false;
312     }
313     return true;
314     }
315    
316     /**
317     * Computes next value for random victim probe. Scans don't
318     * require a very high quality generator, but also not a crummy
319     * one. Marsaglia xor-shift is cheap and works well.
320     */
321     private static int xorShift(int r) {
322 jsr166 1.6 r ^= (r << 13);
323     r ^= (r >>> 17);
324     return r ^ (r << 5);
325 jsr166 1.1 }
326    
327     // Lifecycle methods
328    
329     /**
330     * This method is required to be public, but should never be
331     * called explicitly. It performs the main run loop to execute
332     * ForkJoinTasks.
333     */
334     public void run() {
335     Throwable exception = null;
336     try {
337     onStart();
338     pool.sync(this); // await first pool event
339     mainLoop();
340     } catch (Throwable ex) {
341     exception = ex;
342     } finally {
343     onTermination(exception);
344     }
345     }
346    
347     /**
348     * Executes tasks until shut down.
349     */
350     private void mainLoop() {
351     while (!isShutdown()) {
352     ForkJoinTask<?> t = pollTask();
353     if (t != null || (t = pollSubmission()) != null)
354     t.quietlyExec();
355     else if (tryInactivate())
356     pool.sync(this);
357     }
358     }
359    
360     /**
361     * Initializes internal state after construction but before
362     * processing any tasks. If you override this method, you must
363     * invoke super.onStart() at the beginning of the method.
364     * Initialization requires care: Most fields must have legal
365     * default values, to ensure that attempted accesses from other
366     * threads work correctly even before this thread starts
367     * processing tasks.
368     */
369     protected void onStart() {
370     // Allocate while starting to improve chances of thread-local
371     // isolation
372     queue = new ForkJoinTask<?>[INITIAL_QUEUE_CAPACITY];
373     // Initial value of seed need not be especially random but
374     // should differ across workers and must be nonzero
375     int p = poolIndex + 1;
376     seed = p + (p << 8) + (p << 16) + (p << 24); // spread bits
377     }
378    
379     /**
380     * Performs cleanup associated with termination of this worker
381     * thread. If you override this method, you must invoke
382     * {@code super.onTermination} at the end of the overridden method.
383     *
384     * @param exception the exception causing this thread to abort due
385 jsr166 1.4 * to an unrecoverable error, or {@code null} if completed normally
386 jsr166 1.1 */
387     protected void onTermination(Throwable exception) {
388     // Execute remaining local tasks unless aborting or terminating
389 jsr166 1.8 while (exception == null && pool.isProcessingTasks() && base != sp) {
390 jsr166 1.1 try {
391     ForkJoinTask<?> t = popTask();
392     if (t != null)
393     t.quietlyExec();
394     } catch (Throwable ex) {
395     exception = ex;
396     }
397     }
398     // Cancel other tasks, transition status, notify pool, and
399     // propagate exception to uncaught exception handler
400     try {
401     do {} while (!tryInactivate()); // ensure inactive
402     cancelTasks();
403     runState = TERMINATED;
404     pool.workerTerminated(this);
405     } catch (Throwable ex) { // Shouldn't ever happen
406     if (exception == null) // but if so, at least rethrown
407     exception = ex;
408     } finally {
409     if (exception != null)
410     ForkJoinTask.rethrowException(exception);
411     }
412     }
413    
414     // Intrinsics-based support for queue operations.
415    
416 jsr166 1.10 private static long slotOffset(int i) {
417     return ((long) i << qShift) + qBase;
418     }
419    
420 jsr166 1.1 /**
421     * Adds in store-order the given task at given slot of q to null.
422     * Caller must ensure q is non-null and index is in range.
423     */
424     private static void setSlot(ForkJoinTask<?>[] q, int i,
425     ForkJoinTask<?> t) {
426 jsr166 1.10 UNSAFE.putOrderedObject(q, slotOffset(i), t);
427 jsr166 1.1 }
428    
429     /**
430     * CAS given slot of q to null. Caller must ensure q is non-null
431     * and index is in range.
432     */
433     private static boolean casSlotNull(ForkJoinTask<?>[] q, int i,
434     ForkJoinTask<?> t) {
435 jsr166 1.10 return UNSAFE.compareAndSwapObject(q, slotOffset(i), t, null);
436 jsr166 1.1 }
437    
438     /**
439     * Sets sp in store-order.
440     */
441     private void storeSp(int s) {
442     UNSAFE.putOrderedInt(this, spOffset, s);
443     }
444    
445     // Main queue methods
446    
447     /**
448     * Pushes a task. Called only by current thread.
449     *
450     * @param t the task. Caller must ensure non-null.
451     */
452     final void pushTask(ForkJoinTask<?> t) {
453     ForkJoinTask<?>[] q = queue;
454     int mask = q.length - 1;
455     int s = sp;
456     setSlot(q, s & mask, t);
457     storeSp(++s);
458     if ((s -= base) == 1)
459     pool.signalWork();
460     else if (s >= mask)
461     growQueue();
462     }
463    
464     /**
465     * Tries to take a task from the base of the queue, failing if
466     * either empty or contended.
467     *
468     * @return a task, or null if none or contended
469     */
470     final ForkJoinTask<?> deqTask() {
471     ForkJoinTask<?> t;
472     ForkJoinTask<?>[] q;
473     int i;
474     int b;
475     if (sp != (b = base) &&
476     (q = queue) != null && // must read q after b
477     (t = q[i = (q.length - 1) & b]) != null &&
478     casSlotNull(q, i, t)) {
479     base = b + 1;
480     return t;
481     }
482     return null;
483     }
484    
485     /**
486 jsr166 1.6 * Tries to take a task from the base of own queue, activating if
487     * necessary, failing only if empty. Called only by current thread.
488     *
489     * @return a task, or null if none
490     */
491     final ForkJoinTask<?> locallyDeqTask() {
492     int b;
493     while (sp != (b = base)) {
494     if (tryActivate()) {
495     ForkJoinTask<?>[] q = queue;
496     int i = (q.length - 1) & b;
497     ForkJoinTask<?> t = q[i];
498     if (t != null && casSlotNull(q, i, t)) {
499     base = b + 1;
500     return t;
501     }
502     }
503     }
504     return null;
505     }
506    
507     /**
508 jsr166 1.1 * Returns a popped task, or null if empty. Ensures active status
509     * if non-null. Called only by current thread.
510     */
511     final ForkJoinTask<?> popTask() {
512     int s = sp;
513     while (s != base) {
514     if (tryActivate()) {
515     ForkJoinTask<?>[] q = queue;
516     int mask = q.length - 1;
517     int i = (s - 1) & mask;
518     ForkJoinTask<?> t = q[i];
519     if (t == null || !casSlotNull(q, i, t))
520     break;
521     storeSp(s - 1);
522     return t;
523     }
524     }
525     return null;
526     }
527    
528     /**
529     * Specialized version of popTask to pop only if
530     * topmost element is the given task. Called only
531     * by current thread while active.
532     *
533     * @param t the task. Caller must ensure non-null.
534     */
535     final boolean unpushTask(ForkJoinTask<?> t) {
536     ForkJoinTask<?>[] q = queue;
537     int mask = q.length - 1;
538     int s = sp - 1;
539     if (casSlotNull(q, s & mask, t)) {
540     storeSp(s);
541     return true;
542     }
543     return false;
544     }
545    
546     /**
547 jsr166 1.6 * Returns next task or null if empty or contended
548 jsr166 1.1 */
549     final ForkJoinTask<?> peekTask() {
550     ForkJoinTask<?>[] q = queue;
551     if (q == null)
552     return null;
553     int mask = q.length - 1;
554     int i = locallyFifo ? base : (sp - 1);
555     return q[i & mask];
556     }
557    
558     /**
559     * Doubles queue array size. Transfers elements by emulating
560     * steals (deqs) from old array and placing, oldest first, into
561     * new array.
562     */
563     private void growQueue() {
564     ForkJoinTask<?>[] oldQ = queue;
565     int oldSize = oldQ.length;
566     int newSize = oldSize << 1;
567     if (newSize > MAXIMUM_QUEUE_CAPACITY)
568     throw new RejectedExecutionException("Queue capacity exceeded");
569     ForkJoinTask<?>[] newQ = queue = new ForkJoinTask<?>[newSize];
570    
571     int b = base;
572     int bf = b + oldSize;
573     int oldMask = oldSize - 1;
574     int newMask = newSize - 1;
575     do {
576     int oldIndex = b & oldMask;
577     ForkJoinTask<?> t = oldQ[oldIndex];
578     if (t != null && !casSlotNull(oldQ, oldIndex, t))
579     t = null;
580     setSlot(newQ, b & newMask, t);
581     } while (++b != bf);
582     pool.signalWork();
583     }
584    
585     /**
586     * Tries to steal a task from another worker. Starts at a random
587     * index of workers array, and probes workers until finding one
588     * with non-empty queue or finding that all are empty. It
589     * randomly selects the first n probes. If these are empty, it
590     * resorts to a full circular traversal, which is necessary to
591     * accurately set active status by caller. Also restarts if pool
592     * events occurred since last scan, which forces refresh of
593     * workers array, in case barrier was associated with resize.
594     *
595     * This method must be both fast and quiet -- usually avoiding
596     * memory accesses that could disrupt cache sharing etc other than
597     * those needed to check for and take tasks. This accounts for,
598     * among other things, updating random seed in place without
599     * storing it until exit.
600     *
601     * @return a task, or null if none found
602     */
603     private ForkJoinTask<?> scan() {
604     ForkJoinTask<?> t = null;
605     int r = seed; // extract once to keep scan quiet
606     ForkJoinWorkerThread[] ws; // refreshed on outer loop
607     int mask; // must be power 2 minus 1 and > 0
608     outer:do {
609     if ((ws = pool.workers) != null && (mask = ws.length - 1) > 0) {
610     int idx = r;
611     int probes = ~mask; // use random index while negative
612     for (;;) {
613     r = xorShift(r); // update random seed
614     ForkJoinWorkerThread v = ws[mask & idx];
615     if (v == null || v.sp == v.base) {
616     if (probes <= mask)
617     idx = (probes++ < 0) ? r : (idx + 1);
618     else
619     break;
620     }
621     else if (!tryActivate() || (t = v.deqTask()) == null)
622     continue outer; // restart on contention
623     else
624     break outer;
625     }
626     }
627     } while (pool.hasNewSyncEvent(this)); // retry on pool events
628     seed = r;
629     return t;
630     }
631    
632     /**
633     * Gets and removes a local or stolen task.
634     *
635     * @return a task, if available
636     */
637     final ForkJoinTask<?> pollTask() {
638 jsr166 1.6 ForkJoinTask<?> t = locallyFifo ? locallyDeqTask() : popTask();
639 jsr166 1.1 if (t == null && (t = scan()) != null)
640     ++stealCount;
641     return t;
642     }
643    
644     /**
645     * Gets a local task.
646     *
647     * @return a task, if available
648     */
649     final ForkJoinTask<?> pollLocalTask() {
650 jsr166 1.6 return locallyFifo ? locallyDeqTask() : popTask();
651 jsr166 1.1 }
652    
653     /**
654     * Returns a pool submission, if one exists, activating first.
655     *
656     * @return a submission, if available
657     */
658     private ForkJoinTask<?> pollSubmission() {
659     ForkJoinPool p = pool;
660     while (p.hasQueuedSubmissions()) {
661     ForkJoinTask<?> t;
662     if (tryActivate() && (t = p.pollSubmission()) != null)
663     return t;
664     }
665     return null;
666     }
667    
668     // Methods accessed only by Pool
669    
670     /**
671     * Removes and cancels all tasks in queue. Can be called from any
672     * thread.
673     */
674     final void cancelTasks() {
675     ForkJoinTask<?> t;
676     while (base != sp && (t = deqTask()) != null)
677     t.cancelIgnoringExceptions();
678     }
679    
680     /**
681     * Drains tasks to given collection c.
682     *
683     * @return the number of tasks drained
684     */
685 jsr166 1.5 final int drainTasksTo(Collection<? super ForkJoinTask<?>> c) {
686 jsr166 1.1 int n = 0;
687     ForkJoinTask<?> t;
688     while (base != sp && (t = deqTask()) != null) {
689     c.add(t);
690     ++n;
691     }
692     return n;
693     }
694    
695     /**
696     * Gets and clears steal count for accumulation by pool. Called
697     * only when known to be idle (in pool.sync and termination).
698     */
699     final int getAndClearStealCount() {
700     int sc = stealCount;
701     stealCount = 0;
702     return sc;
703     }
704    
705     /**
706 jsr166 1.4 * Returns {@code true} if at least one worker in the given array
707     * appears to have at least one queued task.
708 jsr166 1.1 *
709     * @param ws array of workers
710     */
711     static boolean hasQueuedTasks(ForkJoinWorkerThread[] ws) {
712     if (ws != null) {
713     int len = ws.length;
714     for (int j = 0; j < 2; ++j) { // need two passes for clean sweep
715     for (int i = 0; i < len; ++i) {
716     ForkJoinWorkerThread w = ws[i];
717     if (w != null && w.sp != w.base)
718     return true;
719     }
720     }
721     }
722     return false;
723     }
724    
725     // Support methods for ForkJoinTask
726    
727     /**
728     * Returns an estimate of the number of tasks in the queue.
729     */
730     final int getQueueSize() {
731     // suppress momentarily negative values
732     return Math.max(0, sp - base);
733     }
734    
735     /**
736     * Returns an estimate of the number of tasks, offset by a
737     * function of number of idle workers.
738     */
739     final int getEstimatedSurplusTaskCount() {
740 dl 1.11 /*
741     * The goal here is to provide a very cheap heuristic guide
742     * for task partitioning when programmers, frameworks, tools,
743     * or languages have little or no idea about task granularity.
744     * In essence by offering this method, we ask users only about
745     * tradeoffs in overhead vs expected throughput and its
746     * variance, rather than how finely to partition tasks.
747     *
748     * In a steady state strict (tree-structured) computation,
749     * each thread makes available for stealing enough tasks for
750     * other threads to remain active. Inductively, if all threads
751     * play by the same rules, each thread should make available
752 jsr166 1.13 * only a constant number of tasks.
753 dl 1.11 *
754     * The minimum useful constant is just 1. But using a value of
755     * 1 would require immediate replenishment upon each steal to
756     * maintain enough tasks, which is infeasible. Further,
757     * partitionings/granularities of offered tasks should
758     * minimize steal rates, which in general means that threads
759     * nearer the top of computation tree should generate more
760     * than those nearer the bottom. In perfect steady state, each
761     * thread is at approximately the same level of computation
762     * tree. However, producing extra tasks amortizes the
763     * uncertainty of progress and diffusion assumptions.
764     *
765     * So, users will want to use values larger, but not much
766     * larger than 1 to both smooth over transient shortages and
767     * hedge against uneven progress; as traded off against the
768     * cost of extra task overhead. We leave the user to pick a
769     * threshold value to compare with the results of this call to
770 jsr166 1.13 * guide decisions, but recommend values such as 3.
771 dl 1.11 *
772     * When all threads are active, it is on average OK to
773     * estimate surplus strictly locally. In steady-state, if one
774     * thread is maintaining say 2 surplus tasks, then so are
775     * others. So we can just use estimated queue length (although
776     * note that (sp - base) can be an overestimate because of
777     * stealers lagging increments of base).
778     *
779     * However, this strategy alone leads to serious mis-estimates
780     * in some non-steady-state conditions (ramp-up, ramp-down,
781     * other stalls). We can detect many of these by further
782     * considering the number of "idle" threads, that are known to
783     * have zero queued tasks. A straight compensation would lead
784     * to weighting of the queued task estimate by a function of
785     * the proportion of idle threads. However, we don't want to
786     * waste much calculation for the sake of weightings that only
787     * apply transiently, so cheapen this by (a) not bothering to
788     * weight at all unless there is more than one queued task (b)
789     * rather than compensating by a factor of (#idle/#active)
790 jsr166 1.12 * threads, we just subtract out a function of #idle that is
791 dl 1.11 * a good enough approximation for conditions near the
792     * borderlines for threshold testing. This errs in the
793     * direction of reporting more extreme lack of surplus (as in
794     * returning negative values) in cases where users should
795     * almost surely be generating tasks anyway.
796     */
797     int n = sp - base;
798     return n > 1? n - (pool.getIdleThreadCount() >>> 2) : n;
799 jsr166 1.1 }
800    
801     /**
802     * Scans, returning early if joinMe done.
803     */
804     final ForkJoinTask<?> scanWhileJoining(ForkJoinTask<?> joinMe) {
805     ForkJoinTask<?> t = pollTask();
806     if (t != null && joinMe.status < 0 && sp == base) {
807     pushTask(t); // unsteal if done and this task would be stealable
808     t = null;
809     }
810     return t;
811     }
812    
813     /**
814     * Runs tasks until {@code pool.isQuiescent()}.
815     */
816     final void helpQuiescePool() {
817     for (;;) {
818     ForkJoinTask<?> t = pollTask();
819     if (t != null)
820     t.quietlyExec();
821     else if (tryInactivate() && pool.isQuiescent())
822     break;
823     }
824     do {} while (!tryActivate()); // re-activate on exit
825     }
826    
827     // Unsafe mechanics
828    
829     private static final sun.misc.Unsafe UNSAFE = sun.misc.Unsafe.getUnsafe();
830 jsr166 1.2 private static final long spOffset =
831 jsr166 1.3 objectFieldOffset("sp", ForkJoinWorkerThread.class);
832 jsr166 1.2 private static final long runStateOffset =
833 jsr166 1.3 objectFieldOffset("runState", ForkJoinWorkerThread.class);
834 jsr166 1.2 private static final long qBase;
835     private static final int qShift;
836 jsr166 1.1
837     static {
838     qBase = UNSAFE.arrayBaseOffset(ForkJoinTask[].class);
839     int s = UNSAFE.arrayIndexScale(ForkJoinTask[].class);
840     if ((s & (s-1)) != 0)
841     throw new Error("data type scale not a power of two");
842     qShift = 31 - Integer.numberOfLeadingZeros(s);
843     }
844 jsr166 1.3
845     private static long objectFieldOffset(String field, Class<?> klazz) {
846     try {
847     return UNSAFE.objectFieldOffset(klazz.getDeclaredField(field));
848     } catch (NoSuchFieldException e) {
849     // Convert Exception to corresponding Error
850     NoSuchFieldError error = new NoSuchFieldError(field);
851     error.initCause(e);
852     throw error;
853     }
854     }
855 jsr166 1.1 }