5 |
|
*/ |
6 |
|
|
7 |
|
package jsr166y; |
8 |
< |
import java.util.*; |
8 |
> |
|
9 |
|
import java.util.concurrent.*; |
10 |
< |
import java.util.concurrent.atomic.*; |
11 |
< |
import java.util.concurrent.locks.*; |
12 |
< |
import sun.misc.Unsafe; |
13 |
< |
import java.lang.reflect.*; |
10 |
> |
|
11 |
> |
import java.util.Collection; |
12 |
|
|
13 |
|
/** |
14 |
|
* A thread managed by a {@link ForkJoinPool}. This class is |
15 |
|
* subclassable solely for the sake of adding functionality -- there |
16 |
< |
* are no overridable methods dealing with scheduling or |
17 |
< |
* execution. However, you can override initialization and termination |
18 |
< |
* methods surrounding the main task processing loop. If you do |
19 |
< |
* create such a subclass, you will also need to supply a custom |
20 |
< |
* ForkJoinWorkerThreadFactory to use it in a ForkJoinPool. |
16 |
> |
* are no overridable methods dealing with scheduling or execution. |
17 |
> |
* However, you can override initialization and termination methods |
18 |
> |
* surrounding the main task processing loop. If you do create such a |
19 |
> |
* subclass, you will also need to supply a custom {@link |
20 |
> |
* ForkJoinPool.ForkJoinWorkerThreadFactory} to use it in a {@code |
21 |
> |
* ForkJoinPool}. |
22 |
|
* |
23 |
|
* @since 1.7 |
24 |
|
* @author Doug Lea |
57 |
|
* considered individually, is not wait-free. One thief cannot |
58 |
|
* successfully continue until another in-progress one (or, if |
59 |
|
* previously empty, a push) completes. However, in the |
60 |
< |
* aggregate, we ensure at least probabilistic non-blockingness. If |
61 |
< |
* an attempted steal fails, a thief always chooses a different |
62 |
< |
* random victim target to try next. So, in order for one thief to |
63 |
< |
* progress, it suffices for any in-progress deq or new push on |
64 |
< |
* any empty queue to complete. One reason this works well here is |
65 |
< |
* that apparently-nonempty often means soon-to-be-stealable, |
66 |
< |
* which gives threads a chance to activate if necessary before |
67 |
< |
* stealing (see below). |
60 |
> |
* aggregate, we ensure at least probabilistic |
61 |
> |
* non-blockingness. If an attempted steal fails, a thief always |
62 |
> |
* chooses a different random victim target to try next. So, in |
63 |
> |
* order for one thief to progress, it suffices for any |
64 |
> |
* in-progress deq or new push on any empty queue to complete. One |
65 |
> |
* reason this works well here is that apparently-nonempty often |
66 |
> |
* means soon-to-be-stealable, which gives threads a chance to |
67 |
> |
* activate if necessary before stealing (see below). |
68 |
> |
* |
69 |
> |
* This approach also enables support for "async mode" where local |
70 |
> |
* task processing is in FIFO, not LIFO order; simply by using a |
71 |
> |
* version of deq rather than pop when locallyFifo is true (as set |
72 |
> |
* by the ForkJoinPool). This allows use in message-passing |
73 |
> |
* frameworks in which tasks are never joined. |
74 |
|
* |
75 |
|
* Efficient implementation of this approach currently relies on |
76 |
|
* an uncomfortable amount of "Unsafe" mechanics. To maintain |
80 |
|
* protected by volatile base reads, reads of the queue array and |
81 |
|
* its slots do not need volatile load semantics, but writes (in |
82 |
|
* push) require store order and CASes (in pop and deq) require |
83 |
< |
* (volatile) CAS semantics. Since these combinations aren't |
84 |
< |
* supported using ordinary volatiles, the only way to accomplish |
85 |
< |
* these efficiently is to use direct Unsafe calls. (Using external |
83 |
> |
* (volatile) CAS semantics. (See "Idempotent work stealing" by |
84 |
> |
* Michael, Saraswat, and Vechev, PPoPP 2009 |
85 |
> |
* http://portal.acm.org/citation.cfm?id=1504186 for an algorithm |
86 |
> |
* with similar properties, but without support for nulling |
87 |
> |
* slots.) Since these combinations aren't supported using |
88 |
> |
* ordinary volatiles, the only way to accomplish these |
89 |
> |
* efficiently is to use direct Unsafe calls. (Using external |
90 |
|
* AtomicIntegers and AtomicReferenceArrays for the indices and |
91 |
|
* array is significantly slower because of memory locality and |
92 |
< |
* indirection effects.) Further, performance on most platforms is |
93 |
< |
* very sensitive to placement and sizing of the (resizable) queue |
94 |
< |
* array. Even though these queues don't usually become all that |
95 |
< |
* big, the initial size must be large enough to counteract cache |
92 |
> |
* indirection effects.) |
93 |
> |
* |
94 |
> |
* Further, performance on most platforms is very sensitive to |
95 |
> |
* placement and sizing of the (resizable) queue array. Even |
96 |
> |
* though these queues don't usually become all that big, the |
97 |
> |
* initial size must be large enough to counteract cache |
98 |
|
* contention effects across multiple queues (especially in the |
99 |
|
* presence of GC cardmarking). Also, to improve thread-locality, |
100 |
|
* queues are currently initialized immediately after the thread |
111 |
|
* counter (activeCount) held by the pool. It uses an algorithm |
112 |
|
* similar to that in Herlihy and Shavit section 17.6 to cause |
113 |
|
* threads to eventually block when all threads declare they are |
114 |
< |
* inactive. (See variable "scans".) For this to work, threads |
115 |
< |
* must be declared active when executing tasks, and before |
116 |
< |
* stealing a task. They must be inactive before blocking on the |
117 |
< |
* Pool Barrier (awaiting a new submission or other Pool |
118 |
< |
* event). In between, there is some free play which we take |
119 |
< |
* advantage of to avoid contention and rapid flickering of the |
120 |
< |
* global activeCount: If inactive, we activate only if a victim |
121 |
< |
* queue appears to be nonempty (see above). Similarly, a thread |
122 |
< |
* tries to inactivate only after a full scan of other threads. |
123 |
< |
* The net effect is that contention on activeCount is rarely a |
124 |
< |
* measurable performance issue. (There are also a few other cases |
125 |
< |
* where we scan for work rather than retry/block upon |
115 |
< |
* contention.) |
114 |
> |
* inactive. For this to work, threads must be declared active |
115 |
> |
* when executing tasks, and before stealing a task. They must be |
116 |
> |
* inactive before blocking on the Pool Barrier (awaiting a new |
117 |
> |
* submission or other Pool event). In between, there is some free |
118 |
> |
* play which we take advantage of to avoid contention and rapid |
119 |
> |
* flickering of the global activeCount: If inactive, we activate |
120 |
> |
* only if a victim queue appears to be nonempty (see above). |
121 |
> |
* Similarly, a thread tries to inactivate only after a full scan |
122 |
> |
* of other threads. The net effect is that contention on |
123 |
> |
* activeCount is rarely a measurable performance issue. (There |
124 |
> |
* are also a few other cases where we scan for work rather than |
125 |
> |
* retry/block upon contention.) |
126 |
|
* |
127 |
|
* 3. Selection control. We maintain policy of always choosing to |
128 |
|
* run local tasks rather than stealing, and always trying to |
277 |
|
final boolean shutdownNow() { return transitionRunStateTo(TERMINATING); } |
278 |
|
|
279 |
|
/** |
280 |
< |
* Transitions to at least the given state. Returns true if not |
281 |
< |
* already at least at given state. |
280 |
> |
* Transitions to at least the given state. |
281 |
> |
* |
282 |
> |
* @return {@code true} if not already at least at given state |
283 |
|
*/ |
284 |
|
private boolean transitionRunStateTo(int state) { |
285 |
|
for (;;) { |
321 |
|
* one. Marsaglia xor-shift is cheap and works well. |
322 |
|
*/ |
323 |
|
private static int xorShift(int r) { |
324 |
< |
r ^= r << 1; |
325 |
< |
r ^= r >>> 3; |
326 |
< |
r ^= r << 10; |
316 |
< |
return r; |
324 |
> |
r ^= (r << 13); |
325 |
> |
r ^= (r >>> 17); |
326 |
> |
return r ^ (r << 5); |
327 |
|
} |
328 |
|
|
329 |
|
// Lifecycle methods |
384 |
|
* {@code super.onTermination} at the end of the overridden method. |
385 |
|
* |
386 |
|
* @param exception the exception causing this thread to abort due |
387 |
< |
* to an unrecoverable error, or null if completed normally |
387 |
> |
* to an unrecoverable error, or {@code null} if completed normally |
388 |
|
*/ |
389 |
|
protected void onTermination(Throwable exception) { |
390 |
|
// Execute remaining local tasks unless aborting or terminating |
391 |
< |
while (exception == null && !pool.isTerminating() && base != sp) { |
391 |
> |
while (exception == null && pool.isProcessingTasks() && base != sp) { |
392 |
|
try { |
393 |
|
ForkJoinTask<?> t = popTask(); |
394 |
|
if (t != null) |
481 |
|
} |
482 |
|
|
483 |
|
/** |
484 |
+ |
* Tries to take a task from the base of own queue, activating if |
485 |
+ |
* necessary, failing only if empty. Called only by current thread. |
486 |
+ |
* |
487 |
+ |
* @return a task, or null if none |
488 |
+ |
*/ |
489 |
+ |
final ForkJoinTask<?> locallyDeqTask() { |
490 |
+ |
int b; |
491 |
+ |
while (sp != (b = base)) { |
492 |
+ |
if (tryActivate()) { |
493 |
+ |
ForkJoinTask<?>[] q = queue; |
494 |
+ |
int i = (q.length - 1) & b; |
495 |
+ |
ForkJoinTask<?> t = q[i]; |
496 |
+ |
if (t != null && casSlotNull(q, i, t)) { |
497 |
+ |
base = b + 1; |
498 |
+ |
return t; |
499 |
+ |
} |
500 |
+ |
} |
501 |
+ |
} |
502 |
+ |
return null; |
503 |
+ |
} |
504 |
+ |
|
505 |
+ |
/** |
506 |
|
* Returns a popped task, or null if empty. Ensures active status |
507 |
|
* if non-null. Called only by current thread. |
508 |
|
*/ |
542 |
|
} |
543 |
|
|
544 |
|
/** |
545 |
< |
* Returns next task. |
545 |
> |
* Returns next task or null if empty or contended |
546 |
|
*/ |
547 |
|
final ForkJoinTask<?> peekTask() { |
548 |
|
ForkJoinTask<?>[] q = queue; |
633 |
|
* @return a task, if available |
634 |
|
*/ |
635 |
|
final ForkJoinTask<?> pollTask() { |
636 |
< |
ForkJoinTask<?> t = locallyFifo ? deqTask() : popTask(); |
636 |
> |
ForkJoinTask<?> t = locallyFifo ? locallyDeqTask() : popTask(); |
637 |
|
if (t == null && (t = scan()) != null) |
638 |
|
++stealCount; |
639 |
|
return t; |
645 |
|
* @return a task, if available |
646 |
|
*/ |
647 |
|
final ForkJoinTask<?> pollLocalTask() { |
648 |
< |
return locallyFifo ? deqTask() : popTask(); |
648 |
> |
return locallyFifo ? locallyDeqTask() : popTask(); |
649 |
|
} |
650 |
|
|
651 |
|
/** |
680 |
|
* |
681 |
|
* @return the number of tasks drained |
682 |
|
*/ |
683 |
< |
final int drainTasksTo(Collection<ForkJoinTask<?>> c) { |
683 |
> |
final int drainTasksTo(Collection<? super ForkJoinTask<?>> c) { |
684 |
|
int n = 0; |
685 |
|
ForkJoinTask<?> t; |
686 |
|
while (base != sp && (t = deqTask()) != null) { |
701 |
|
} |
702 |
|
|
703 |
|
/** |
704 |
< |
* Returns true if at least one worker in the given array appears |
705 |
< |
* to have at least one queued task. |
704 |
> |
* Returns {@code true} if at least one worker in the given array |
705 |
> |
* appears to have at least one queued task. |
706 |
|
* |
707 |
|
* @param ws array of workers |
708 |
|
*/ |
765 |
|
do {} while (!tryActivate()); // re-activate on exit |
766 |
|
} |
767 |
|
|
768 |
< |
// Temporary Unsafe mechanics for preliminary release |
769 |
< |
private static Unsafe getUnsafe() throws Throwable { |
768 |
> |
// Unsafe mechanics |
769 |
> |
|
770 |
> |
private static final sun.misc.Unsafe UNSAFE = getUnsafe(); |
771 |
> |
private static final long spOffset = |
772 |
> |
objectFieldOffset("sp", ForkJoinWorkerThread.class); |
773 |
> |
private static final long runStateOffset = |
774 |
> |
objectFieldOffset("runState", ForkJoinWorkerThread.class); |
775 |
> |
private static final long qBase; |
776 |
> |
private static final int qShift; |
777 |
> |
|
778 |
> |
static { |
779 |
> |
qBase = UNSAFE.arrayBaseOffset(ForkJoinTask[].class); |
780 |
> |
int s = UNSAFE.arrayIndexScale(ForkJoinTask[].class); |
781 |
> |
if ((s & (s-1)) != 0) |
782 |
> |
throw new Error("data type scale not a power of two"); |
783 |
> |
qShift = 31 - Integer.numberOfLeadingZeros(s); |
784 |
> |
} |
785 |
> |
|
786 |
> |
private static long objectFieldOffset(String field, Class<?> klazz) { |
787 |
> |
try { |
788 |
> |
return UNSAFE.objectFieldOffset(klazz.getDeclaredField(field)); |
789 |
> |
} catch (NoSuchFieldException e) { |
790 |
> |
// Convert Exception to corresponding Error |
791 |
> |
NoSuchFieldError error = new NoSuchFieldError(field); |
792 |
> |
error.initCause(e); |
793 |
> |
throw error; |
794 |
> |
} |
795 |
> |
} |
796 |
> |
|
797 |
> |
/** |
798 |
> |
* Returns a sun.misc.Unsafe. Suitable for use in a 3rd party package. |
799 |
> |
* Replace with a simple call to Unsafe.getUnsafe when integrating |
800 |
> |
* into a jdk. |
801 |
> |
* |
802 |
> |
* @return a sun.misc.Unsafe |
803 |
> |
*/ |
804 |
> |
private static sun.misc.Unsafe getUnsafe() { |
805 |
|
try { |
806 |
< |
return Unsafe.getUnsafe(); |
806 |
> |
return sun.misc.Unsafe.getUnsafe(); |
807 |
|
} catch (SecurityException se) { |
808 |
|
try { |
809 |
|
return java.security.AccessController.doPrivileged |
810 |
< |
(new java.security.PrivilegedExceptionAction<Unsafe>() { |
811 |
< |
public Unsafe run() throws Exception { |
812 |
< |
return getUnsafePrivileged(); |
810 |
> |
(new java.security |
811 |
> |
.PrivilegedExceptionAction<sun.misc.Unsafe>() { |
812 |
> |
public sun.misc.Unsafe run() throws Exception { |
813 |
> |
java.lang.reflect.Field f = sun.misc |
814 |
> |
.Unsafe.class.getDeclaredField("theUnsafe"); |
815 |
> |
f.setAccessible(true); |
816 |
> |
return (sun.misc.Unsafe) f.get(null); |
817 |
|
}}); |
818 |
|
} catch (java.security.PrivilegedActionException e) { |
819 |
< |
throw e.getCause(); |
819 |
> |
throw new RuntimeException("Could not initialize intrinsics", |
820 |
> |
e.getCause()); |
821 |
|
} |
822 |
|
} |
823 |
|
} |
752 |
– |
|
753 |
– |
private static Unsafe getUnsafePrivileged() |
754 |
– |
throws NoSuchFieldException, IllegalAccessException { |
755 |
– |
Field f = Unsafe.class.getDeclaredField("theUnsafe"); |
756 |
– |
f.setAccessible(true); |
757 |
– |
return (Unsafe) f.get(null); |
758 |
– |
} |
759 |
– |
|
760 |
– |
private static long fieldOffset(String fieldName) |
761 |
– |
throws NoSuchFieldException { |
762 |
– |
return UNSAFE.objectFieldOffset |
763 |
– |
(ForkJoinWorkerThread.class.getDeclaredField(fieldName)); |
764 |
– |
} |
765 |
– |
|
766 |
– |
static final Unsafe UNSAFE; |
767 |
– |
static final long baseOffset; |
768 |
– |
static final long spOffset; |
769 |
– |
static final long runStateOffset; |
770 |
– |
static final long qBase; |
771 |
– |
static final int qShift; |
772 |
– |
static { |
773 |
– |
try { |
774 |
– |
UNSAFE = getUnsafe(); |
775 |
– |
baseOffset = fieldOffset("base"); |
776 |
– |
spOffset = fieldOffset("sp"); |
777 |
– |
runStateOffset = fieldOffset("runState"); |
778 |
– |
qBase = UNSAFE.arrayBaseOffset(ForkJoinTask[].class); |
779 |
– |
int s = UNSAFE.arrayIndexScale(ForkJoinTask[].class); |
780 |
– |
if ((s & (s-1)) != 0) |
781 |
– |
throw new Error("data type scale not a power of two"); |
782 |
– |
qShift = 31 - Integer.numberOfLeadingZeros(s); |
783 |
– |
} catch (Throwable e) { |
784 |
– |
throw new RuntimeException("Could not initialize intrinsics", e); |
785 |
– |
} |
786 |
– |
} |
824 |
|
} |