13 |
|
/** |
14 |
|
* A thread managed by a {@link ForkJoinPool}. This class is |
15 |
|
* subclassable solely for the sake of adding functionality -- there |
16 |
< |
* are no overridable methods dealing with scheduling or |
17 |
< |
* execution. However, you can override initialization and termination |
18 |
< |
* methods surrounding the main task processing loop. If you do |
19 |
< |
* create such a subclass, you will also need to supply a custom |
20 |
< |
* ForkJoinWorkerThreadFactory to use it in a ForkJoinPool. |
16 |
> |
* are no overridable methods dealing with scheduling or execution. |
17 |
> |
* However, you can override initialization and termination methods |
18 |
> |
* surrounding the main task processing loop. If you do create such a |
19 |
> |
* subclass, you will also need to supply a custom {@link |
20 |
> |
* ForkJoinPool.ForkJoinWorkerThreadFactory} to use it in a {@code |
21 |
> |
* ForkJoinPool}. |
22 |
|
* |
23 |
|
* @since 1.7 |
24 |
|
* @author Doug Lea |
57 |
|
* considered individually, is not wait-free. One thief cannot |
58 |
|
* successfully continue until another in-progress one (or, if |
59 |
|
* previously empty, a push) completes. However, in the |
60 |
< |
* aggregate, we ensure at least probabilistic non-blockingness. If |
61 |
< |
* an attempted steal fails, a thief always chooses a different |
62 |
< |
* random victim target to try next. So, in order for one thief to |
63 |
< |
* progress, it suffices for any in-progress deq or new push on |
64 |
< |
* any empty queue to complete. One reason this works well here is |
65 |
< |
* that apparently-nonempty often means soon-to-be-stealable, |
66 |
< |
* which gives threads a chance to activate if necessary before |
67 |
< |
* stealing (see below). |
60 |
> |
* aggregate, we ensure at least probabilistic |
61 |
> |
* non-blockingness. If an attempted steal fails, a thief always |
62 |
> |
* chooses a different random victim target to try next. So, in |
63 |
> |
* order for one thief to progress, it suffices for any |
64 |
> |
* in-progress deq or new push on any empty queue to complete. One |
65 |
> |
* reason this works well here is that apparently-nonempty often |
66 |
> |
* means soon-to-be-stealable, which gives threads a chance to |
67 |
> |
* activate if necessary before stealing (see below). |
68 |
> |
* |
69 |
> |
* This approach also enables support for "async mode" where local |
70 |
> |
* task processing is in FIFO, not LIFO order; simply by using a |
71 |
> |
* version of deq rather than pop when locallyFifo is true (as set |
72 |
> |
* by the ForkJoinPool). This allows use in message-passing |
73 |
> |
* frameworks in which tasks are never joined. |
74 |
|
* |
75 |
|
* Efficient implementation of this approach currently relies on |
76 |
|
* an uncomfortable amount of "Unsafe" mechanics. To maintain |
80 |
|
* protected by volatile base reads, reads of the queue array and |
81 |
|
* its slots do not need volatile load semantics, but writes (in |
82 |
|
* push) require store order and CASes (in pop and deq) require |
83 |
< |
* (volatile) CAS semantics. Since these combinations aren't |
84 |
< |
* supported using ordinary volatiles, the only way to accomplish |
85 |
< |
* these efficiently is to use direct Unsafe calls. (Using external |
83 |
> |
* (volatile) CAS semantics. (See "Idempotent work stealing" by |
84 |
> |
* Michael, Saraswat, and Vechev, PPoPP 2009 |
85 |
> |
* http://portal.acm.org/citation.cfm?id=1504186 for an algorithm |
86 |
> |
* with similar properties, but without support for nulling |
87 |
> |
* slots.) Since these combinations aren't supported using |
88 |
> |
* ordinary volatiles, the only way to accomplish these |
89 |
> |
* efficiently is to use direct Unsafe calls. (Using external |
90 |
|
* AtomicIntegers and AtomicReferenceArrays for the indices and |
91 |
|
* array is significantly slower because of memory locality and |
92 |
< |
* indirection effects.) Further, performance on most platforms is |
93 |
< |
* very sensitive to placement and sizing of the (resizable) queue |
94 |
< |
* array. Even though these queues don't usually become all that |
95 |
< |
* big, the initial size must be large enough to counteract cache |
92 |
> |
* indirection effects.) |
93 |
> |
* |
94 |
> |
* Further, performance on most platforms is very sensitive to |
95 |
> |
* placement and sizing of the (resizable) queue array. Even |
96 |
> |
* though these queues don't usually become all that big, the |
97 |
> |
* initial size must be large enough to counteract cache |
98 |
|
* contention effects across multiple queues (especially in the |
99 |
|
* presence of GC cardmarking). Also, to improve thread-locality, |
100 |
|
* queues are currently initialized immediately after the thread |
111 |
|
* counter (activeCount) held by the pool. It uses an algorithm |
112 |
|
* similar to that in Herlihy and Shavit section 17.6 to cause |
113 |
|
* threads to eventually block when all threads declare they are |
114 |
< |
* inactive. (See variable "scans".) For this to work, threads |
115 |
< |
* must be declared active when executing tasks, and before |
116 |
< |
* stealing a task. They must be inactive before blocking on the |
117 |
< |
* Pool Barrier (awaiting a new submission or other Pool |
118 |
< |
* event). In between, there is some free play which we take |
119 |
< |
* advantage of to avoid contention and rapid flickering of the |
120 |
< |
* global activeCount: If inactive, we activate only if a victim |
121 |
< |
* queue appears to be nonempty (see above). Similarly, a thread |
122 |
< |
* tries to inactivate only after a full scan of other threads. |
123 |
< |
* The net effect is that contention on activeCount is rarely a |
124 |
< |
* measurable performance issue. (There are also a few other cases |
125 |
< |
* where we scan for work rather than retry/block upon |
113 |
< |
* contention.) |
114 |
> |
* inactive. For this to work, threads must be declared active |
115 |
> |
* when executing tasks, and before stealing a task. They must be |
116 |
> |
* inactive before blocking on the Pool Barrier (awaiting a new |
117 |
> |
* submission or other Pool event). In between, there is some free |
118 |
> |
* play which we take advantage of to avoid contention and rapid |
119 |
> |
* flickering of the global activeCount: If inactive, we activate |
120 |
> |
* only if a victim queue appears to be nonempty (see above). |
121 |
> |
* Similarly, a thread tries to inactivate only after a full scan |
122 |
> |
* of other threads. The net effect is that contention on |
123 |
> |
* activeCount is rarely a measurable performance issue. (There |
124 |
> |
* are also a few other cases where we scan for work rather than |
125 |
> |
* retry/block upon contention.) |
126 |
|
* |
127 |
|
* 3. Selection control. We maintain policy of always choosing to |
128 |
|
* run local tasks rather than stealing, and always trying to |
277 |
|
final boolean shutdownNow() { return transitionRunStateTo(TERMINATING); } |
278 |
|
|
279 |
|
/** |
280 |
< |
* Transitions to at least the given state. Returns true if not |
281 |
< |
* already at least at given state. |
280 |
> |
* Transitions to at least the given state. |
281 |
> |
* |
282 |
> |
* @return {@code true} if not already at least at given state |
283 |
|
*/ |
284 |
|
private boolean transitionRunStateTo(int state) { |
285 |
|
for (;;) { |
321 |
|
* one. Marsaglia xor-shift is cheap and works well. |
322 |
|
*/ |
323 |
|
private static int xorShift(int r) { |
324 |
< |
r ^= r << 1; |
325 |
< |
r ^= r >>> 3; |
326 |
< |
r ^= r << 10; |
314 |
< |
return r; |
324 |
> |
r ^= (r << 13); |
325 |
> |
r ^= (r >>> 17); |
326 |
> |
return r ^ (r << 5); |
327 |
|
} |
328 |
|
|
329 |
|
// Lifecycle methods |
384 |
|
* {@code super.onTermination} at the end of the overridden method. |
385 |
|
* |
386 |
|
* @param exception the exception causing this thread to abort due |
387 |
< |
* to an unrecoverable error, or null if completed normally |
387 |
> |
* to an unrecoverable error, or {@code null} if completed normally |
388 |
|
*/ |
389 |
|
protected void onTermination(Throwable exception) { |
390 |
|
// Execute remaining local tasks unless aborting or terminating |
391 |
< |
while (exception == null && !pool.isTerminating() && base != sp) { |
391 |
> |
while (exception == null && pool.isProcessingTasks() && base != sp) { |
392 |
|
try { |
393 |
|
ForkJoinTask<?> t = popTask(); |
394 |
|
if (t != null) |
415 |
|
|
416 |
|
// Intrinsics-based support for queue operations. |
417 |
|
|
418 |
+ |
private static long slotOffset(int i) { |
419 |
+ |
return ((long) i << qShift) + qBase; |
420 |
+ |
} |
421 |
+ |
|
422 |
|
/** |
423 |
|
* Adds in store-order the given task at given slot of q to null. |
424 |
|
* Caller must ensure q is non-null and index is in range. |
425 |
|
*/ |
426 |
|
private static void setSlot(ForkJoinTask<?>[] q, int i, |
427 |
|
ForkJoinTask<?> t) { |
428 |
< |
UNSAFE.putOrderedObject(q, (i << qShift) + qBase, t); |
428 |
> |
UNSAFE.putOrderedObject(q, slotOffset(i), t); |
429 |
|
} |
430 |
|
|
431 |
|
/** |
434 |
|
*/ |
435 |
|
private static boolean casSlotNull(ForkJoinTask<?>[] q, int i, |
436 |
|
ForkJoinTask<?> t) { |
437 |
< |
return UNSAFE.compareAndSwapObject(q, (i << qShift) + qBase, t, null); |
437 |
> |
return UNSAFE.compareAndSwapObject(q, slotOffset(i), t, null); |
438 |
|
} |
439 |
|
|
440 |
|
/** |
485 |
|
} |
486 |
|
|
487 |
|
/** |
488 |
+ |
* Tries to take a task from the base of own queue, activating if |
489 |
+ |
* necessary, failing only if empty. Called only by current thread. |
490 |
+ |
* |
491 |
+ |
* @return a task, or null if none |
492 |
+ |
*/ |
493 |
+ |
final ForkJoinTask<?> locallyDeqTask() { |
494 |
+ |
int b; |
495 |
+ |
while (sp != (b = base)) { |
496 |
+ |
if (tryActivate()) { |
497 |
+ |
ForkJoinTask<?>[] q = queue; |
498 |
+ |
int i = (q.length - 1) & b; |
499 |
+ |
ForkJoinTask<?> t = q[i]; |
500 |
+ |
if (t != null && casSlotNull(q, i, t)) { |
501 |
+ |
base = b + 1; |
502 |
+ |
return t; |
503 |
+ |
} |
504 |
+ |
} |
505 |
+ |
} |
506 |
+ |
return null; |
507 |
+ |
} |
508 |
+ |
|
509 |
+ |
/** |
510 |
|
* Returns a popped task, or null if empty. Ensures active status |
511 |
|
* if non-null. Called only by current thread. |
512 |
|
*/ |
546 |
|
} |
547 |
|
|
548 |
|
/** |
549 |
< |
* Returns next task. |
549 |
> |
* Returns next task or null if empty or contended |
550 |
|
*/ |
551 |
|
final ForkJoinTask<?> peekTask() { |
552 |
|
ForkJoinTask<?>[] q = queue; |
637 |
|
* @return a task, if available |
638 |
|
*/ |
639 |
|
final ForkJoinTask<?> pollTask() { |
640 |
< |
ForkJoinTask<?> t = locallyFifo ? deqTask() : popTask(); |
640 |
> |
ForkJoinTask<?> t = locallyFifo ? locallyDeqTask() : popTask(); |
641 |
|
if (t == null && (t = scan()) != null) |
642 |
|
++stealCount; |
643 |
|
return t; |
649 |
|
* @return a task, if available |
650 |
|
*/ |
651 |
|
final ForkJoinTask<?> pollLocalTask() { |
652 |
< |
return locallyFifo ? deqTask() : popTask(); |
652 |
> |
return locallyFifo ? locallyDeqTask() : popTask(); |
653 |
|
} |
654 |
|
|
655 |
|
/** |
684 |
|
* |
685 |
|
* @return the number of tasks drained |
686 |
|
*/ |
687 |
< |
final int drainTasksTo(Collection<ForkJoinTask<?>> c) { |
687 |
> |
final int drainTasksTo(Collection<? super ForkJoinTask<?>> c) { |
688 |
|
int n = 0; |
689 |
|
ForkJoinTask<?> t; |
690 |
|
while (base != sp && (t = deqTask()) != null) { |
705 |
|
} |
706 |
|
|
707 |
|
/** |
708 |
< |
* Returns true if at least one worker in the given array appears |
709 |
< |
* to have at least one queued task. |
708 |
> |
* Returns {@code true} if at least one worker in the given array |
709 |
> |
* appears to have at least one queued task. |
710 |
|
* |
711 |
|
* @param ws array of workers |
712 |
|
*/ |
769 |
|
do {} while (!tryActivate()); // re-activate on exit |
770 |
|
} |
771 |
|
|
772 |
< |
// Unsafe mechanics for jsr166y 3rd party package. |
772 |
> |
// Unsafe mechanics |
773 |
> |
|
774 |
> |
private static final sun.misc.Unsafe UNSAFE = getUnsafe(); |
775 |
> |
private static final long spOffset = |
776 |
> |
objectFieldOffset("sp", ForkJoinWorkerThread.class); |
777 |
> |
private static final long runStateOffset = |
778 |
> |
objectFieldOffset("runState", ForkJoinWorkerThread.class); |
779 |
> |
private static final long qBase; |
780 |
> |
private static final int qShift; |
781 |
> |
|
782 |
> |
static { |
783 |
> |
qBase = UNSAFE.arrayBaseOffset(ForkJoinTask[].class); |
784 |
> |
int s = UNSAFE.arrayIndexScale(ForkJoinTask[].class); |
785 |
> |
if ((s & (s-1)) != 0) |
786 |
> |
throw new Error("data type scale not a power of two"); |
787 |
> |
qShift = 31 - Integer.numberOfLeadingZeros(s); |
788 |
> |
} |
789 |
> |
|
790 |
> |
private static long objectFieldOffset(String field, Class<?> klazz) { |
791 |
> |
try { |
792 |
> |
return UNSAFE.objectFieldOffset(klazz.getDeclaredField(field)); |
793 |
> |
} catch (NoSuchFieldException e) { |
794 |
> |
// Convert Exception to corresponding Error |
795 |
> |
NoSuchFieldError error = new NoSuchFieldError(field); |
796 |
> |
error.initCause(e); |
797 |
> |
throw error; |
798 |
> |
} |
799 |
> |
} |
800 |
> |
|
801 |
> |
/** |
802 |
> |
* Returns a sun.misc.Unsafe. Suitable for use in a 3rd party package. |
803 |
> |
* Replace with a simple call to Unsafe.getUnsafe when integrating |
804 |
> |
* into a jdk. |
805 |
> |
* |
806 |
> |
* @return a sun.misc.Unsafe |
807 |
> |
*/ |
808 |
|
private static sun.misc.Unsafe getUnsafe() { |
809 |
|
try { |
810 |
|
return sun.misc.Unsafe.getUnsafe(); |
811 |
|
} catch (SecurityException se) { |
812 |
|
try { |
813 |
|
return java.security.AccessController.doPrivileged |
814 |
< |
(new java.security.PrivilegedExceptionAction<sun.misc.Unsafe>() { |
814 |
> |
(new java.security |
815 |
> |
.PrivilegedExceptionAction<sun.misc.Unsafe>() { |
816 |
|
public sun.misc.Unsafe run() throws Exception { |
817 |
< |
return getUnsafeByReflection(); |
817 |
> |
java.lang.reflect.Field f = sun.misc |
818 |
> |
.Unsafe.class.getDeclaredField("theUnsafe"); |
819 |
> |
f.setAccessible(true); |
820 |
> |
return (sun.misc.Unsafe) f.get(null); |
821 |
|
}}); |
822 |
|
} catch (java.security.PrivilegedActionException e) { |
823 |
|
throw new RuntimeException("Could not initialize intrinsics", |
825 |
|
} |
826 |
|
} |
827 |
|
} |
751 |
– |
|
752 |
– |
private static sun.misc.Unsafe getUnsafeByReflection() |
753 |
– |
throws NoSuchFieldException, IllegalAccessException { |
754 |
– |
java.lang.reflect.Field f = |
755 |
– |
sun.misc.Unsafe.class.getDeclaredField("theUnsafe"); |
756 |
– |
f.setAccessible(true); |
757 |
– |
return (sun.misc.Unsafe) f.get(null); |
758 |
– |
} |
759 |
– |
|
760 |
– |
private static long fieldOffset(String fieldName, Class<?> klazz) { |
761 |
– |
try { |
762 |
– |
return UNSAFE.objectFieldOffset(klazz.getDeclaredField(fieldName)); |
763 |
– |
} catch (NoSuchFieldException e) { |
764 |
– |
// Convert Exception to Error |
765 |
– |
NoSuchFieldError error = new NoSuchFieldError(fieldName); |
766 |
– |
error.initCause(e); |
767 |
– |
throw error; |
768 |
– |
} |
769 |
– |
} |
770 |
– |
|
771 |
– |
private static final sun.misc.Unsafe UNSAFE = getUnsafe(); |
772 |
– |
static final long baseOffset = |
773 |
– |
fieldOffset("base", ForkJoinWorkerThread.class); |
774 |
– |
static final long spOffset = |
775 |
– |
fieldOffset("sp", ForkJoinWorkerThread.class); |
776 |
– |
static final long runStateOffset = |
777 |
– |
fieldOffset("runState", ForkJoinWorkerThread.class); |
778 |
– |
static final long qBase; |
779 |
– |
static final int qShift; |
780 |
– |
|
781 |
– |
static { |
782 |
– |
qBase = UNSAFE.arrayBaseOffset(ForkJoinTask[].class); |
783 |
– |
int s = UNSAFE.arrayIndexScale(ForkJoinTask[].class); |
784 |
– |
if ((s & (s-1)) != 0) |
785 |
– |
throw new Error("data type scale not a power of two"); |
786 |
– |
qShift = 31 - Integer.numberOfLeadingZeros(s); |
787 |
– |
} |
828 |
|
} |