5 |
|
*/ |
6 |
|
|
7 |
|
package jsr166y; |
8 |
< |
import java.util.*; |
8 |
> |
|
9 |
|
import java.util.concurrent.*; |
10 |
< |
import java.util.concurrent.atomic.*; |
11 |
< |
import java.util.concurrent.locks.*; |
12 |
< |
import sun.misc.Unsafe; |
13 |
< |
import java.lang.reflect.*; |
10 |
> |
|
11 |
> |
import java.util.Collection; |
12 |
|
|
13 |
|
/** |
14 |
|
* A thread managed by a {@link ForkJoinPool}. This class is |
15 |
|
* subclassable solely for the sake of adding functionality -- there |
16 |
< |
* are no overridable methods dealing with scheduling or |
17 |
< |
* execution. However, you can override initialization and termination |
18 |
< |
* methods surrounding the main task processing loop. If you do |
19 |
< |
* create such a subclass, you will also need to supply a custom |
20 |
< |
* ForkJoinWorkerThreadFactory to use it in a ForkJoinPool. |
16 |
> |
* are no overridable methods dealing with scheduling or execution. |
17 |
> |
* However, you can override initialization and termination methods |
18 |
> |
* surrounding the main task processing loop. If you do create such a |
19 |
> |
* subclass, you will also need to supply a custom {@link |
20 |
> |
* ForkJoinPool.ForkJoinWorkerThreadFactory} to use it in a {@code |
21 |
> |
* ForkJoinPool}. |
22 |
|
* |
23 |
|
* @since 1.7 |
24 |
|
* @author Doug Lea |
57 |
|
* considered individually, is not wait-free. One thief cannot |
58 |
|
* successfully continue until another in-progress one (or, if |
59 |
|
* previously empty, a push) completes. However, in the |
60 |
< |
* aggregate, we ensure at least probabilistic non-blockingness. If |
61 |
< |
* an attempted steal fails, a thief always chooses a different |
62 |
< |
* random victim target to try next. So, in order for one thief to |
63 |
< |
* progress, it suffices for any in-progress deq or new push on |
64 |
< |
* any empty queue to complete. One reason this works well here is |
65 |
< |
* that apparently-nonempty often means soon-to-be-stealable, |
66 |
< |
* which gives threads a chance to activate if necessary before |
67 |
< |
* stealing (see below). |
60 |
> |
* aggregate, we ensure at least probabilistic |
61 |
> |
* non-blockingness. If an attempted steal fails, a thief always |
62 |
> |
* chooses a different random victim target to try next. So, in |
63 |
> |
* order for one thief to progress, it suffices for any |
64 |
> |
* in-progress deq or new push on any empty queue to complete. One |
65 |
> |
* reason this works well here is that apparently-nonempty often |
66 |
> |
* means soon-to-be-stealable, which gives threads a chance to |
67 |
> |
* activate if necessary before stealing (see below). |
68 |
> |
* |
69 |
> |
* This approach also enables support for "async mode" where local |
70 |
> |
* task processing is in FIFO, not LIFO order; simply by using a |
71 |
> |
* version of deq rather than pop when locallyFifo is true (as set |
72 |
> |
* by the ForkJoinPool). This allows use in message-passing |
73 |
> |
* frameworks in which tasks are never joined. |
74 |
|
* |
75 |
|
* Efficient implementation of this approach currently relies on |
76 |
|
* an uncomfortable amount of "Unsafe" mechanics. To maintain |
80 |
|
* protected by volatile base reads, reads of the queue array and |
81 |
|
* its slots do not need volatile load semantics, but writes (in |
82 |
|
* push) require store order and CASes (in pop and deq) require |
83 |
< |
* (volatile) CAS semantics. Since these combinations aren't |
84 |
< |
* supported using ordinary volatiles, the only way to accomplish |
85 |
< |
* these efficiently is to use direct Unsafe calls. (Using external |
83 |
> |
* (volatile) CAS semantics. (See "Idempotent work stealing" by |
84 |
> |
* Michael, Saraswat, and Vechev, PPoPP 2009 |
85 |
> |
* http://portal.acm.org/citation.cfm?id=1504186 for an algorithm |
86 |
> |
* with similar properties, but without support for nulling |
87 |
> |
* slots.) Since these combinations aren't supported using |
88 |
> |
* ordinary volatiles, the only way to accomplish these |
89 |
> |
* efficiently is to use direct Unsafe calls. (Using external |
90 |
|
* AtomicIntegers and AtomicReferenceArrays for the indices and |
91 |
|
* array is significantly slower because of memory locality and |
92 |
< |
* indirection effects.) Further, performance on most platforms is |
93 |
< |
* very sensitive to placement and sizing of the (resizable) queue |
94 |
< |
* array. Even though these queues don't usually become all that |
95 |
< |
* big, the initial size must be large enough to counteract cache |
92 |
> |
* indirection effects.) |
93 |
> |
* |
94 |
> |
* Further, performance on most platforms is very sensitive to |
95 |
> |
* placement and sizing of the (resizable) queue array. Even |
96 |
> |
* though these queues don't usually become all that big, the |
97 |
> |
* initial size must be large enough to counteract cache |
98 |
|
* contention effects across multiple queues (especially in the |
99 |
|
* presence of GC cardmarking). Also, to improve thread-locality, |
100 |
|
* queues are currently initialized immediately after the thread |
111 |
|
* counter (activeCount) held by the pool. It uses an algorithm |
112 |
|
* similar to that in Herlihy and Shavit section 17.6 to cause |
113 |
|
* threads to eventually block when all threads declare they are |
114 |
< |
* inactive. (See variable "scans".) For this to work, threads |
115 |
< |
* must be declared active when executing tasks, and before |
116 |
< |
* stealing a task. They must be inactive before blocking on the |
117 |
< |
* Pool Barrier (awaiting a new submission or other Pool |
118 |
< |
* event). In between, there is some free play which we take |
119 |
< |
* advantage of to avoid contention and rapid flickering of the |
120 |
< |
* global activeCount: If inactive, we activate only if a victim |
121 |
< |
* queue appears to be nonempty (see above). Similarly, a thread |
122 |
< |
* tries to inactivate only after a full scan of other threads. |
123 |
< |
* The net effect is that contention on activeCount is rarely a |
124 |
< |
* measurable performance issue. (There are also a few other cases |
125 |
< |
* where we scan for work rather than retry/block upon |
115 |
< |
* contention.) |
114 |
> |
* inactive. For this to work, threads must be declared active |
115 |
> |
* when executing tasks, and before stealing a task. They must be |
116 |
> |
* inactive before blocking on the Pool Barrier (awaiting a new |
117 |
> |
* submission or other Pool event). In between, there is some free |
118 |
> |
* play which we take advantage of to avoid contention and rapid |
119 |
> |
* flickering of the global activeCount: If inactive, we activate |
120 |
> |
* only if a victim queue appears to be nonempty (see above). |
121 |
> |
* Similarly, a thread tries to inactivate only after a full scan |
122 |
> |
* of other threads. The net effect is that contention on |
123 |
> |
* activeCount is rarely a measurable performance issue. (There |
124 |
> |
* are also a few other cases where we scan for work rather than |
125 |
> |
* retry/block upon contention.) |
126 |
|
* |
127 |
|
* 3. Selection control. We maintain policy of always choosing to |
128 |
|
* run local tasks rather than stealing, and always trying to |
149 |
|
private static final int MAXIMUM_QUEUE_CAPACITY = 1 << 28; |
150 |
|
|
151 |
|
/** |
152 |
< |
* The pool this thread works in. Accessed directly by ForkJoinTask |
152 |
> |
* The pool this thread works in. Accessed directly by ForkJoinTask. |
153 |
|
*/ |
154 |
|
final ForkJoinPool pool; |
155 |
|
|
177 |
|
* Activity status. When true, this worker is considered active. |
178 |
|
* Must be false upon construction. It must be true when executing |
179 |
|
* tasks, and BEFORE stealing a task. It must be false before |
180 |
< |
* calling pool.sync |
180 |
> |
* calling pool.sync. |
181 |
|
*/ |
182 |
|
private boolean active; |
183 |
|
|
200 |
|
|
201 |
|
/** |
202 |
|
* Index of this worker in pool array. Set once by pool before |
203 |
< |
* running, and accessed directly by pool during cleanup etc |
203 |
> |
* running, and accessed directly by pool during cleanup etc. |
204 |
|
*/ |
205 |
|
int poolIndex; |
206 |
|
|
277 |
|
final boolean shutdownNow() { return transitionRunStateTo(TERMINATING); } |
278 |
|
|
279 |
|
/** |
280 |
< |
* Transitions to at least the given state. Returns true if not |
281 |
< |
* already at least at given state. |
280 |
> |
* Transitions to at least the given state. |
281 |
> |
* |
282 |
> |
* @return {@code true} if not already at least at given state |
283 |
|
*/ |
284 |
|
private boolean transitionRunStateTo(int state) { |
285 |
|
for (;;) { |
304 |
|
} |
305 |
|
|
306 |
|
/** |
307 |
< |
* Tries to set status to active; fails on contention. |
307 |
> |
* Tries to set status to inactive; fails on contention. |
308 |
|
*/ |
309 |
|
private boolean tryInactivate() { |
310 |
|
if (active) { |
321 |
|
* one. Marsaglia xor-shift is cheap and works well. |
322 |
|
*/ |
323 |
|
private static int xorShift(int r) { |
324 |
< |
r ^= r << 1; |
325 |
< |
r ^= r >>> 3; |
326 |
< |
r ^= r << 10; |
316 |
< |
return r; |
324 |
> |
r ^= (r << 13); |
325 |
> |
r ^= (r >>> 17); |
326 |
> |
return r ^ (r << 5); |
327 |
|
} |
328 |
|
|
329 |
|
// Lifecycle methods |
381 |
|
/** |
382 |
|
* Performs cleanup associated with termination of this worker |
383 |
|
* thread. If you override this method, you must invoke |
384 |
< |
* super.onTermination at the end of the overridden method. |
384 |
> |
* {@code super.onTermination} at the end of the overridden method. |
385 |
|
* |
386 |
|
* @param exception the exception causing this thread to abort due |
387 |
< |
* to an unrecoverable error, or null if completed normally |
387 |
> |
* to an unrecoverable error, or {@code null} if completed normally |
388 |
|
*/ |
389 |
|
protected void onTermination(Throwable exception) { |
390 |
|
// Execute remaining local tasks unless aborting or terminating |
391 |
< |
while (exception == null && !pool.isTerminating() && base != sp) { |
391 |
> |
while (exception == null && pool.isProcessingTasks() && base != sp) { |
392 |
|
try { |
393 |
|
ForkJoinTask<?> t = popTask(); |
394 |
|
if (t != null) |
395 |
|
t.quietlyExec(); |
396 |
< |
} catch(Throwable ex) { |
396 |
> |
} catch (Throwable ex) { |
397 |
|
exception = ex; |
398 |
|
} |
399 |
|
} |
400 |
|
// Cancel other tasks, transition status, notify pool, and |
401 |
|
// propagate exception to uncaught exception handler |
402 |
|
try { |
403 |
< |
do;while (!tryInactivate()); // ensure inactive |
403 |
> |
do {} while (!tryInactivate()); // ensure inactive |
404 |
|
cancelTasks(); |
405 |
|
runState = TERMINATED; |
406 |
|
pool.workerTerminated(this); |
415 |
|
|
416 |
|
// Intrinsics-based support for queue operations. |
417 |
|
|
418 |
+ |
private static long slotOffset(int i) { |
419 |
+ |
return ((long) i << qShift) + qBase; |
420 |
+ |
} |
421 |
+ |
|
422 |
|
/** |
423 |
|
* Adds in store-order the given task at given slot of q to null. |
424 |
|
* Caller must ensure q is non-null and index is in range. |
425 |
|
*/ |
426 |
|
private static void setSlot(ForkJoinTask<?>[] q, int i, |
427 |
< |
ForkJoinTask<?> t){ |
428 |
< |
UNSAFE.putOrderedObject(q, (i << qShift) + qBase, t); |
427 |
> |
ForkJoinTask<?> t) { |
428 |
> |
UNSAFE.putOrderedObject(q, slotOffset(i), t); |
429 |
|
} |
430 |
|
|
431 |
|
/** |
434 |
|
*/ |
435 |
|
private static boolean casSlotNull(ForkJoinTask<?>[] q, int i, |
436 |
|
ForkJoinTask<?> t) { |
437 |
< |
return UNSAFE.compareAndSwapObject(q, (i << qShift) + qBase, t, null); |
437 |
> |
return UNSAFE.compareAndSwapObject(q, slotOffset(i), t, null); |
438 |
|
} |
439 |
|
|
440 |
|
/** |
485 |
|
} |
486 |
|
|
487 |
|
/** |
488 |
+ |
* Tries to take a task from the base of own queue, activating if |
489 |
+ |
* necessary, failing only if empty. Called only by current thread. |
490 |
+ |
* |
491 |
+ |
* @return a task, or null if none |
492 |
+ |
*/ |
493 |
+ |
final ForkJoinTask<?> locallyDeqTask() { |
494 |
+ |
int b; |
495 |
+ |
while (sp != (b = base)) { |
496 |
+ |
if (tryActivate()) { |
497 |
+ |
ForkJoinTask<?>[] q = queue; |
498 |
+ |
int i = (q.length - 1) & b; |
499 |
+ |
ForkJoinTask<?> t = q[i]; |
500 |
+ |
if (t != null && casSlotNull(q, i, t)) { |
501 |
+ |
base = b + 1; |
502 |
+ |
return t; |
503 |
+ |
} |
504 |
+ |
} |
505 |
+ |
} |
506 |
+ |
return null; |
507 |
+ |
} |
508 |
+ |
|
509 |
+ |
/** |
510 |
|
* Returns a popped task, or null if empty. Ensures active status |
511 |
|
* if non-null. Called only by current thread. |
512 |
|
*/ |
546 |
|
} |
547 |
|
|
548 |
|
/** |
549 |
< |
* Returns next task. |
549 |
> |
* Returns next task or null if empty or contended |
550 |
|
*/ |
551 |
|
final ForkJoinTask<?> peekTask() { |
552 |
|
ForkJoinTask<?>[] q = queue; |
553 |
|
if (q == null) |
554 |
|
return null; |
555 |
|
int mask = q.length - 1; |
556 |
< |
int i = locallyFifo? base : (sp - 1); |
556 |
> |
int i = locallyFifo ? base : (sp - 1); |
557 |
|
return q[i & mask]; |
558 |
|
} |
559 |
|
|
616 |
|
ForkJoinWorkerThread v = ws[mask & idx]; |
617 |
|
if (v == null || v.sp == v.base) { |
618 |
|
if (probes <= mask) |
619 |
< |
idx = (probes++ < 0)? r : (idx + 1); |
619 |
> |
idx = (probes++ < 0) ? r : (idx + 1); |
620 |
|
else |
621 |
|
break; |
622 |
|
} |
637 |
|
* @return a task, if available |
638 |
|
*/ |
639 |
|
final ForkJoinTask<?> pollTask() { |
640 |
< |
ForkJoinTask<?> t = locallyFifo? deqTask() : popTask(); |
640 |
> |
ForkJoinTask<?> t = locallyFifo ? locallyDeqTask() : popTask(); |
641 |
|
if (t == null && (t = scan()) != null) |
642 |
|
++stealCount; |
643 |
|
return t; |
649 |
|
* @return a task, if available |
650 |
|
*/ |
651 |
|
final ForkJoinTask<?> pollLocalTask() { |
652 |
< |
return locallyFifo? deqTask() : popTask(); |
652 |
> |
return locallyFifo ? locallyDeqTask() : popTask(); |
653 |
|
} |
654 |
|
|
655 |
|
/** |
684 |
|
* |
685 |
|
* @return the number of tasks drained |
686 |
|
*/ |
687 |
< |
final int drainTasksTo(Collection<ForkJoinTask<?>> c) { |
687 |
> |
final int drainTasksTo(Collection<? super ForkJoinTask<?>> c) { |
688 |
|
int n = 0; |
689 |
|
ForkJoinTask<?> t; |
690 |
|
while (base != sp && (t = deqTask()) != null) { |
705 |
|
} |
706 |
|
|
707 |
|
/** |
708 |
< |
* Returns true if at least one worker in the given array appears |
709 |
< |
* to have at least one queued task. |
708 |
> |
* Returns {@code true} if at least one worker in the given array |
709 |
> |
* appears to have at least one queued task. |
710 |
> |
* |
711 |
|
* @param ws array of workers |
712 |
|
*/ |
713 |
|
static boolean hasQueuedTasks(ForkJoinWorkerThread[] ws) { |
730 |
|
* Returns an estimate of the number of tasks in the queue. |
731 |
|
*/ |
732 |
|
final int getQueueSize() { |
733 |
< |
int n = sp - base; |
734 |
< |
return n < 0? 0 : n; // suppress momentarily negative values |
733 |
> |
// suppress momentarily negative values |
734 |
> |
return Math.max(0, sp - base); |
735 |
|
} |
736 |
|
|
737 |
|
/** |
744 |
|
} |
745 |
|
|
746 |
|
/** |
747 |
< |
* Scans, returning early if joinMe done |
747 |
> |
* Scans, returning early if joinMe done. |
748 |
|
*/ |
749 |
|
final ForkJoinTask<?> scanWhileJoining(ForkJoinTask<?> joinMe) { |
750 |
|
ForkJoinTask<?> t = pollTask(); |
756 |
|
} |
757 |
|
|
758 |
|
/** |
759 |
< |
* Runs tasks until pool isQuiescent. |
759 |
> |
* Runs tasks until {@code pool.isQuiescent()}. |
760 |
|
*/ |
761 |
|
final void helpQuiescePool() { |
762 |
|
for (;;) { |
766 |
|
else if (tryInactivate() && pool.isQuiescent()) |
767 |
|
break; |
768 |
|
} |
769 |
< |
do;while (!tryActivate()); // re-activate on exit |
769 |
> |
do {} while (!tryActivate()); // re-activate on exit |
770 |
> |
} |
771 |
> |
|
772 |
> |
// Unsafe mechanics |
773 |
> |
|
774 |
> |
private static final sun.misc.Unsafe UNSAFE = getUnsafe(); |
775 |
> |
private static final long spOffset = |
776 |
> |
objectFieldOffset("sp", ForkJoinWorkerThread.class); |
777 |
> |
private static final long runStateOffset = |
778 |
> |
objectFieldOffset("runState", ForkJoinWorkerThread.class); |
779 |
> |
private static final long qBase; |
780 |
> |
private static final int qShift; |
781 |
> |
|
782 |
> |
static { |
783 |
> |
qBase = UNSAFE.arrayBaseOffset(ForkJoinTask[].class); |
784 |
> |
int s = UNSAFE.arrayIndexScale(ForkJoinTask[].class); |
785 |
> |
if ((s & (s-1)) != 0) |
786 |
> |
throw new Error("data type scale not a power of two"); |
787 |
> |
qShift = 31 - Integer.numberOfLeadingZeros(s); |
788 |
|
} |
789 |
|
|
790 |
< |
// Temporary Unsafe mechanics for preliminary release |
736 |
< |
private static Unsafe getUnsafe() throws Throwable { |
790 |
> |
private static long objectFieldOffset(String field, Class<?> klazz) { |
791 |
|
try { |
792 |
< |
return Unsafe.getUnsafe(); |
792 |
> |
return UNSAFE.objectFieldOffset(klazz.getDeclaredField(field)); |
793 |
> |
} catch (NoSuchFieldException e) { |
794 |
> |
// Convert Exception to corresponding Error |
795 |
> |
NoSuchFieldError error = new NoSuchFieldError(field); |
796 |
> |
error.initCause(e); |
797 |
> |
throw error; |
798 |
> |
} |
799 |
> |
} |
800 |
> |
|
801 |
> |
/** |
802 |
> |
* Returns a sun.misc.Unsafe. Suitable for use in a 3rd party package. |
803 |
> |
* Replace with a simple call to Unsafe.getUnsafe when integrating |
804 |
> |
* into a jdk. |
805 |
> |
* |
806 |
> |
* @return a sun.misc.Unsafe |
807 |
> |
*/ |
808 |
> |
private static sun.misc.Unsafe getUnsafe() { |
809 |
> |
try { |
810 |
> |
return sun.misc.Unsafe.getUnsafe(); |
811 |
|
} catch (SecurityException se) { |
812 |
|
try { |
813 |
|
return java.security.AccessController.doPrivileged |
814 |
< |
(new java.security.PrivilegedExceptionAction<Unsafe>() { |
815 |
< |
public Unsafe run() throws Exception { |
816 |
< |
return getUnsafePrivileged(); |
814 |
> |
(new java.security |
815 |
> |
.PrivilegedExceptionAction<sun.misc.Unsafe>() { |
816 |
> |
public sun.misc.Unsafe run() throws Exception { |
817 |
> |
java.lang.reflect.Field f = sun.misc |
818 |
> |
.Unsafe.class.getDeclaredField("theUnsafe"); |
819 |
> |
f.setAccessible(true); |
820 |
> |
return (sun.misc.Unsafe) f.get(null); |
821 |
|
}}); |
822 |
|
} catch (java.security.PrivilegedActionException e) { |
823 |
< |
throw e.getCause(); |
823 |
> |
throw new RuntimeException("Could not initialize intrinsics", |
824 |
> |
e.getCause()); |
825 |
|
} |
826 |
|
} |
827 |
|
} |
751 |
– |
|
752 |
– |
private static Unsafe getUnsafePrivileged() |
753 |
– |
throws NoSuchFieldException, IllegalAccessException { |
754 |
– |
Field f = Unsafe.class.getDeclaredField("theUnsafe"); |
755 |
– |
f.setAccessible(true); |
756 |
– |
return (Unsafe) f.get(null); |
757 |
– |
} |
758 |
– |
|
759 |
– |
private static long fieldOffset(String fieldName) |
760 |
– |
throws NoSuchFieldException { |
761 |
– |
return UNSAFE.objectFieldOffset |
762 |
– |
(ForkJoinWorkerThread.class.getDeclaredField(fieldName)); |
763 |
– |
} |
764 |
– |
|
765 |
– |
static final Unsafe UNSAFE; |
766 |
– |
static final long baseOffset; |
767 |
– |
static final long spOffset; |
768 |
– |
static final long runStateOffset; |
769 |
– |
static final long qBase; |
770 |
– |
static final int qShift; |
771 |
– |
static { |
772 |
– |
try { |
773 |
– |
UNSAFE = getUnsafe(); |
774 |
– |
baseOffset = fieldOffset("base"); |
775 |
– |
spOffset = fieldOffset("sp"); |
776 |
– |
runStateOffset = fieldOffset("runState"); |
777 |
– |
qBase = UNSAFE.arrayBaseOffset(ForkJoinTask[].class); |
778 |
– |
int s = UNSAFE.arrayIndexScale(ForkJoinTask[].class); |
779 |
– |
if ((s & (s-1)) != 0) |
780 |
– |
throw new Error("data type scale not a power of two"); |
781 |
– |
qShift = 31 - Integer.numberOfLeadingZeros(s); |
782 |
– |
} catch (Throwable e) { |
783 |
– |
throw new RuntimeException("Could not initialize intrinsics", e); |
784 |
– |
} |
785 |
– |
} |
828 |
|
} |