ViewVC Help
View File | Revision Log | Show Annotations | Download File | Root Listing
root/jsr166/jsr166/src/main/java/util/concurrent/ForkJoinPool.java
(Generate patch)

Comparing jsr166/src/main/java/util/concurrent/ForkJoinPool.java (file contents):
Revision 1.404 by dl, Sun Nov 14 16:19:13 2021 UTC vs.
Revision 1.405 by dl, Fri Mar 18 16:01:42 2022 UTC

# Line 7 | Line 7
7   package java.util.concurrent;
8  
9   import java.lang.Thread.UncaughtExceptionHandler;
10 import java.lang.invoke.VarHandle;
11 import java.lang.reflect.Field;
10   import java.security.AccessController;
11   import java.security.AccessControlContext;
12   import java.security.Permission;
# Line 25 | Line 23 | import java.util.concurrent.locks.LockSu
23   import java.util.concurrent.locks.ReentrantLock;
24   import java.util.concurrent.locks.Condition;
25   import jdk.internal.misc.Unsafe;
26 + //import jdk.internal.vm.SharedThreadContainer; // for loom
27  
28   /**
29   * An {@link ExecutorService} for running {@link ForkJoinTask}s.
# Line 174 | Line 173 | public class ForkJoinPool extends Abstra
173       * internal methods and nested classes are interrelated, their
174       * main rationale and descriptions are presented here; individual
175       * methods and nested classes contain only brief comments about
176 <     * details.
176 >     * details. There are a fair number of odd code constructions and
177 >     * design decisions for components that reside at the edge of Java
178 >     * vs JVM functionality.
179       *
180       * WorkQueues
181       * ==========
# Line 199 | Line 200 | public class ForkJoinPool extends Abstra
200       * a footprint as possible even in programs generating huge
201       * numbers of tasks. To accomplish this, we shift the CAS
202       * arbitrating pop vs poll (steal) from being on the indices
203 <     * ("base" and "top") to the slots themselves.
203 >     * ("base" and "top") to the slots themselves. These provide the
204 >     * primary required memory ordering -- see "Correct and Efficient
205 >     * Work-Stealing for Weak Memory Models" by Le, Pop, Cohen, and
206 >     * Nardelli, PPoPP 2013
207 >     * (http://www.di.ens.fr/~zappa/readings/ppopp13.pdf) for an
208 >     * analysis of memory ordering requirements in work-stealing
209 >     * algorithms similar to the one used here. We also use ordered,
210 >     * moded accesses and/or fences for other control, with modes
211 >     * reflecting the presence or absence of other contextual sync
212 >     * provided by atomic and/or volatile accesses. Some methods (or
213 >     * their primary loops) begin with an acquire fence that amounts
214 >     * to an acquiring read of "this" to cover all fields (which is
215 >     * sometimes stronger than necessary, but less brittle). Some
216 >     * constructions are intentionally racy because they use read
217 >     * values as hints, not for correctness.
218 >     *
219 >     * We also support a user mode in which local task processing is
220 >     * in FIFO, not LIFO order, simply by using a local version of
221 >     * poll rather than pop.  This can be useful in message-passing
222 >     * frameworks in which tasks are never joined, although with
223 >     * increased contention among task producers and consumers. Also,
224 >     * the same data structure (and class) is used for "submission
225 >     * queues" (described below) holding externally submitted tasks,
226 >     * that differ only in that a lock (field "access"; see below) is
227 >     * required by external callers to push and pop tasks.
228       *
229       * Adding tasks then takes the form of a classic array push(task)
230       * in a circular buffer:
# Line 208 | Line 233 | public class ForkJoinPool extends Abstra
233       * The actual code needs to null-check and size-check the array,
234       * uses masking, not mod, for indexing a power-of-two-sized array,
235       * enforces memory ordering, supports resizing, and possibly
236 <     * signals waiting workers to start scanning -- see below.
236 >     * signals waiting workers to start scanning (described below),
237 >     * which requires that even internal usages to strictly order
238 >     * accesses (using a form of lock release).
239       *
240       * The pop operation (always performed by owner) is of the form:
241       *   if ((task = getAndSet(q.array, (q.top-1) % length, null)) != null)
242       *        decrement top and return task;
243 <     * If this fails, the queue is empty.
243 >     * If this fails, the queue is empty. This operation is one part
244 >     * of the nextLocalTask method, that instead does a local-poll
245 >     * in FIFO mode.
246       *
247 <     * The poll operation by another stealer thread is, basically:
248 <     *   if (CAS nonnull task at q.array[q.base % length] to null)
247 >     * The poll operation is, basically:
248 >     *   if (CAS nonnull task t = q.array[k = q.base % length] to null)
249       *       increment base and return task;
250       *
251 <     * This may fail due to contention, and may be retried.
252 <     * Implementations must ensure a consistent snapshot of the base
253 <     * index and the task (by looping or trying elsewhere) before
254 <     * trying CAS.  There isn't actually a method of this form,
255 <     * because failure due to inconsistency or contention is handled
256 <     * in different ways in different contexts, normally by first
257 <     * trying other queues. (For the most straightforward example, see
258 <     * method pollScan.) There are further variants for cases
259 <     * requiring inspection of elements before extracting them, so
260 <     * must interleave these with variants of this code.  Also, a more
261 <     * efficient version (nextLocalTask) is used for polls by owners.
262 <     * It avoids some overhead because the queue cannot be growing
263 <     * during call.
264 <     *
265 <     * Memory ordering.  See "Correct and Efficient Work-Stealing for
266 <     * Weak Memory Models" by Le, Pop, Cohen, and Nardelli, PPoPP 2013
267 <     * (http://www.di.ens.fr/~zappa/readings/ppopp13.pdf) for an
268 <     * analysis of memory ordering requirements in work-stealing
269 <     * algorithms similar to the one used here.  Inserting and
270 <     * extracting tasks in array slots via volatile or atomic accesses
271 <     * or explicit fences provides primary synchronization.
272 <     *
273 <     * Operations on deque elements require reads and writes of both
274 <     * indices and slots. When possible, we allow these to occur in
275 <     * any order.  Because the base and top indices (along with other
276 <     * pool or array fields accessed in many methods) only imprecisely
277 <     * guide where to extract from, we let accesses other than the
278 <     * element getAndSet/CAS/setVolatile appear in any order, using
279 <     * plain mode. But we must still preface some methods (mainly
280 <     * those that may be accessed externally) with an acquireFence to
281 <     * avoid unbounded staleness. This is equivalent to acting as if
282 <     * callers use an acquiring read of the reference to the pool or
283 <     * queue when invoking the method, even when they do not. We use
284 <     * explicit acquiring reads (getSlot) rather than plain array
285 <     * access when acquire mode is required but not otherwise ensured
286 <     * by context. To reduce stalls by other stealers, we encourage
287 <     * timely writes to the base index by immediately following
288 <     * updates with a write of a volatile field that must be updated
289 <     * anyway, or an Opaque-mode write if there is no such
290 <     * opportunity.
291 <     *
292 <     * Because indices and slot contents cannot always be consistent,
293 <     * the emptiness check base == top is only quiescently accurate
294 <     * (and so used where this suffices). Otherwise, it may err on the
295 <     * side of possibly making the queue appear nonempty when a push,
296 <     * pop, or poll have not fully committed, or making it appear
297 <     * empty when an update of top or base has not yet been seen.
298 <     * Similarly, the check in push for the queue array being full may
299 <     * trigger when not completely full, causing a resize earlier than
300 <     * required.
301 <     *
302 <     * Mainly because of these potential inconsistencies among slots
303 <     * vs indices, the poll operation, considered individually, is not
304 <     * wait-free. One thief cannot successfully continue until another
305 <     * in-progress one (or, if previously empty, a push) visibly
306 <     * completes.  This can stall threads when required to consume
307 <     * from a given queue (which may spin).  However, in the
308 <     * aggregate, we ensure probabilistic non-blockingness at least
309 <     * until checking quiescence (which is intrinsically blocking):
310 <     * If an attempted steal fails, a scanning thief chooses a
311 <     * different victim target to try next. So, in order for one thief
312 <     * to progress, it suffices for any in-progress poll or new push
313 <     * on any empty queue to complete. The worst cases occur when many
314 <     * threads are looking for tasks being produced by a stalled
315 <     * producer.
316 <     *
317 <     * This approach also enables support of a user mode in which
318 <     * local task processing is in FIFO, not LIFO order, simply by
319 <     * using poll rather than pop.  This can be useful in
320 <     * message-passing frameworks in which tasks are never joined,
292 <     * although with increased contention among task producers and
293 <     * consumers.
251 >     * However, there are several more cases that must be dealt with.
252 >     * Some of them are just due to asynchrony; others reflect
253 >     * contention and stealing policies. Stepping through them
254 >     * illustrates some of the implementation decisions in this class.
255 >     *
256 >     *  * Slot k must be read with an acquiring read, which it must
257 >     *    anyway to dereference and run the task if the (acquiring)
258 >     *    CAS succeeds, but uses an explicit acquire fence to support
259 >     *    the following rechecks even if the CAS is not attempted.
260 >     *
261 >     *  * q.base may change between reading and using its value to
262 >     *    index the slot. To avoid trying to use the wrong t, the
263 >     *    index and slot must be reread (not necessarily immediately)
264 >     *    until consistent, unless this is a local poll by owner, in
265 >     *    which case this form of inconsistency can only appear as t
266 >     *    being null, below.
267 >     *
268 >     *  * Similarly, q.array may change (due to a resize), unless this
269 >     *    is a local poll by owner. Otherwise, when t is present, this
270 >     *    only needs consideration on CAS failure (since a CAS
271 >     *    confirms the non-resized case.)
272 >     *
273 >     *  * t may appear null because a previous poll operation has not
274 >     *    yet incremented q.base, so the read is from an already-taken
275 >     *    index. This form of stall reflects the non-lock-freedom of
276 >     *    the poll operation. Stalls can be detected by observing that
277 >     *    q.base doesn't change on repeated reads of null t and when
278 >     *    no other alternatives apply, spin-wait for it to settle.  To
279 >     *    reduce producing these kinds of stalls by other stealers, we
280 >     *    encourage timely writes to indices using store fences when
281 >     *    memory ordering is not already constrained by context.
282 >     *
283 >     *  * The CAS may fail, in which case we may want to retry unless
284 >     *    there is too much contention. One goal is to balance and
285 >     *    spread out the many forms of contention that may be
286 >     *    encountered across polling and other operations to avoid
287 >     *    sustained performance degradations. Across all cases where
288 >     *    alternatives exist, a bounded number of CAS misses or stalls
289 >     *    are tolerated (for slots, ctl, and elsewhere described
290 >     *    below) before taking alternative action. These may move
291 >     *    contention or retries elsewhere, which is still preferable
292 >     *    to single-point bottlenecks.
293 >     *
294 >     *  * Even though the check "top == base" is quiescently accurate
295 >     *    to determine whether a queue is empty, it is not of much use
296 >     *    when deciding whether to try to poll or repoll after a
297 >     *    failure.  Both top and base may move independently, and both
298 >     *    lag updates to the underlying array. To reduce memory
299 >     *    contention, when possible, non-owners avoid reading the
300 >     *    "top" index at all, and instead use array reads, including
301 >     *    one-ahead reads to check whether to repoll, relying on the
302 >     *    fact that an non-empty queue does not have two null slots in
303 >     *    a row, except in cases (resizes and shifts) that can be
304 >     *    detected with a secondary recheck.
305 >     *
306 >     * The poll operations in q.poll(), scan(), helpJoin(), and
307 >     * elsewhere differ with respect to whether other queues are
308 >     * available to try, and the presence or nature of screening steps
309 >     * when only some kinds of tasks can be taken. When alternatives
310 >     * (or failing) is an option, they uniformly give up after
311 >     * boundeed numbers of stalls and/or CAS failures, which reduces
312 >     * contention when too many workers are polling too few tasks.
313 >     * Overall, in the aggregate, we ensure probabilistic
314 >     * non-blockingness of work-stealing at least until checking
315 >     * quiescence (which is intrinsically blocking): If an attempted
316 >     * steal fails in these ways, a scanning thief chooses a different
317 >     * target to try next. In contexts where alternatives aren't
318 >     * available, and when progress conditions can be isolated to
319 >     * values of a single variable, simple spinloops (using
320 >     * Thread.onSpinWait) are used to reduce memory traffic.
321       *
322       * WorkQueues are also used in a similar way for tasks submitted
323       * to the pool. We cannot mix these tasks in the same queues used
# Line 302 | Line 329 | public class ForkJoinPool extends Abstra
329       * like workers except that they are restricted to executing local
330       * tasks that they submitted (or when known, subtasks thereof).
331       * Insertion of tasks in shared mode requires a lock. We use only
332 <     * a simple spinlock (using field "source"), because submitters
333 <     * encountering a busy queue move to a different position to use
334 <     * or create other queues. They block only when registering new
335 <     * queues.
332 >     * a simple spinlock because submitters encountering a busy queue
333 >     * move to a different position to use or create other queues.
334 >     * They (spin) block only when registering new queues, and less
335 >     * often in tryRemove and helpComplete.  The lock needed for
336 >     * external queues is generalized (as field "access") for
337 >     * operations on owned queues that require a fully-fenced write
338 >     * (including push, parking status, and termination) in order to
339 >     * deal with Dekker-like signalling constructs described below.
340       *
341       * Management
342       * ==========
# Line 321 | Line 352 | public class ForkJoinPool extends Abstra
352       * can globally track or maintain, so we pack them into a small
353       * number of variables, often maintaining atomicity without
354       * blocking or locking.  Nearly all essentially atomic control
355 <     * state is held in a few volatile variables that are by far most
356 <     * often read (not written) as status and consistency checks. We
357 <     * pack as much information into them as we can.
355 >     * state is held in a few variables that are by far most often
356 >     * read (not written) as status and consistency checks. We pack as
357 >     * much information into them as we can.
358       *
359       * Field "ctl" contains 64 bits holding information needed to
360       * atomically decide to add, enqueue (on an event queue), and
# Line 331 | Line 362 | public class ForkJoinPool extends Abstra
362       * restrict maximum parallelism to (1<<15)-1 (which is far in
363       * excess of normal operating range) to allow ids, counts, and
364       * their negations (used for thresholding) to fit into 16bit
365 <     * subfields.
366 <     *
367 <     * Field "mode" holds configuration parameters as well as lifetime
368 <     * status, atomically and monotonically setting SHUTDOWN, STOP,
369 <     * and finally TERMINATED bits. It is updated only via bitwise
370 <     * atomics (getAndBitwiseOr).
365 >     * subfields. Field "parallelism" holds the target parallelism
366 >     * (normally corresponding to pool size). It is needed (nearly)
367 >     * only in methods updating ctl, so is packed nearby. As of the
368 >     * current release, users can dynamically reset target
369 >     * parallelism, which is read once per update, so only slowly has
370 >     * an effect in creating threads or letting them time out and
371 >     * terminate when idle.
372 >     *
373 >     * Field "mode" mainly holds lifetime status, atomically and
374 >     * monotonically setting SHUTDOWN, STOP, and finally TERMINATED
375 >     * bits. It is updated only via bitwise atomics (getAndBitwiseOr).
376       *
377       * Array "queues" holds references to WorkQueues.  It is updated
378       * (only during worker creation and termination) under the
379 <     * registrationLock, but is otherwise concurrently readable, and
380 <     * accessed directly (although always prefaced by acquireFences or
381 <     * other acquiring reads). To simplify index-based operations, the
382 <     * array size is always a power of two, and all readers must
383 <     * tolerate null slots.  Worker queues are at odd indices. Worker
384 <     * ids masked with SMASK match their index. Shared (submission)
385 <     * queues are at even indices. Grouping them together in this way
386 <     * simplifies and speeds up task scanning.
379 >     * registrationLock, but is otherwise concurrently readable (often
380 >     * prefaced by a volatile read of mode to check termination, that
381 >     * is required anyway, and serves as an acquire fence). To
382 >     * simplify index-based operations, the array size is always a
383 >     * power of two, and all readers must tolerate null slots.  Worker
384 >     * queues are at odd indices. Worker ids masked with SMASK match
385 >     * their index. Shared (submission) queues are at even
386 >     * indices. Grouping them together in this way simplifies and
387 >     * speeds up task scanning.
388       *
389       * All worker thread creation is on-demand, triggered by task
390       * submissions, replacement of terminated workers, and/or
391       * compensation for blocked workers. However, all other support
392       * code is set up to work with other policies.  To ensure that we
393       * do not hold on to worker or task references that would prevent
394 <     * GC, all accesses to workQueues are via indices into the
395 <     * queues array (which is one source of some of the messy code
396 <     * constructions here). In essence, the queues array serves as
397 <     * a weak reference mechanism. Thus for example the stack top
398 <     * subfield of ctl stores indices, not references.
394 >     * GC, all accesses to workQueues in waiting, signalling, and
395 >     * control methods are via indices into the queues array (which is
396 >     * one source of some of the messy code constructions here). In
397 >     * essence, the queues array serves as a weak reference
398 >     * mechanism. In particular, the stack top subfield of ctl stores
399 >     * indices, not references.
400       *
401       * Queuing Idle Workers. Unlike HPC work-stealing frameworks, we
402       * cannot let workers spin indefinitely scanning for tasks when
# Line 403 | Line 441 | public class ForkJoinPool extends Abstra
441       * to some external caller.
442       *
443       * WorkQueue field "phase" is used by both workers and the pool to
444 <     * manage and track whether a worker is UNSIGNALLED (possibly
445 <     * blocked waiting for a signal).  When a worker is enqueued its
446 <     * phase field is set negative. Note that phase field updates lag
447 <     * queue CAS releases; seeing a negative phase does not guarantee
448 <     * that the worker is available. When queued, the lower 16 bits of
449 <     * its phase must hold its pool index. So we place the index there
450 <     * upon initialization and never modify these bits.
444 >     * manage and track whether a worker is unsignalled (possibly
445 >     * blocked waiting for a signal), convienently using the sign bit
446 >     * to check.  When a worker is enqueued its phase field is set
447 >     * negative. Note that phase field updates lag queue CAS releases;
448 >     * seeing a negative phase does not guarantee that the worker is
449 >     * available (and so is never checked in this way). When queued,
450 >     * the lower 16 bits of its phase must hold its pool index. So we
451 >     * place the index there upon initialization and never modify
452 >     * these bits.
453       *
454       * The ctl field also serves as the basis for memory
455       * synchronization surrounding activation. This uses a more
# Line 418 | Line 458 | public class ForkJoinPool extends Abstra
458       * if to its current value).  However, rather than CASing ctl to
459       * its current value in the common case where no action is
460       * required, we reduce write contention by ensuring that
461 <     * signalWork invocations are prefaced with a full-volatile memory
461 >     * signalWork invocations are prefaced with a fully fenced memory
462       * access (which is usually needed anyway).
463       *
464       * Signalling. Signals (in signalWork) cause new or reactivated
# Line 429 | Line 469 | public class ForkJoinPool extends Abstra
469       * computations are purely tree structured, it suffices for every
470       * worker to activate another when it pushes a task into an empty
471       * queue, resulting in O(log(#threads)) steps to full activation.
472 <     * If instead, tasks come in serially from only a single producer,
473 <     * each worker taking its first (since the last quiescence) task
472 >     * (To reduce resource usages in some cases, at the expense of
473 >     * slower startup in others, activation of an idle thread is
474 >     * preferred over creating a new one, here and elsewhere.)  If
475 >     * instead, tasks come in serially from only a single producer,
476 >     * each worker taking its first (since the last activation) task
477       * from a queue should signal another if there are more tasks in
478       * that queue. This is equivalent to, but generally faster than,
479       * arranging the stealer take two tasks, re-pushing one on its own
480       * queue, and signalling (because its queue is empty), also
481       * resulting in logarithmic full activation time. Because we don't
482       * know about usage patterns (or most commonly, mixtures), we use
483 <     * both approaches.  We approximate the second rule by arranging
484 <     * that workers in scan() do not repeat signals when repeatedly
485 <     * taking tasks from any given queue, by remembering the previous
486 <     * one. There are narrow windows in which both rules may apply,
487 <     * leading to duplicate or unnecessary signals. Despite such
488 <     * limitations, these rules usually avoid slowdowns that otherwise
489 <     * occur when too many workers contend to take too few tasks, or
490 <     * when producers waste most of their time resignalling.  However,
491 <     * contention and overhead effects may still occur during ramp-up,
449 <     * ramp-down, and small computations involving only a few workers.
483 >     * both approaches. Together these are minimally necessary for
484 >     * maintaining liveness. However, they do not account for the fact
485 >     * that when tasks are short-lived, signals are unnecessary
486 >     * because workers will already be scanning for new tasks without
487 >     * the need of new signals. We track these cases (variable
488 >     * "prevSrc" in scan() and related methods) to avoid some
489 >     * unnecessary signals and scans.  However, signal contention and
490 >     * overhead effects may still occur during ramp-up, ramp-down, and
491 >     * small computations involving only a few workers.
492       *
493       * Scanning. Method scan performs top-level scanning for (and
494 <     * execution of) tasks.  Scans by different workers and/or at
495 <     * different times are unlikely to poll queues in the same
496 <     * order. Each scan traverses and tries to poll from each queue in
497 <     * a pseudorandom permutation order by starting at a random index,
498 <     * and using a constant cyclically exhaustive stride; restarting
499 <     * upon contention.  (Non-top-level scans; for example in
500 <     * helpJoin, use simpler linear probes because they do not
501 <     * systematically contend with top-level scans.)  The pseudorandom
502 <     * generator need not have high-quality statistical properties in
503 <     * the long term. We use Marsaglia XorShifts, seeded with the Weyl
504 <     * sequence from ThreadLocalRandom probes, which are cheap and
505 <     * suffice. Scans do not otherwise explicitly take into account
506 <     * core affinities, loads, cache localities, etc, However, they do
507 <     * exploit temporal locality (which usually approximates these) by
508 <     * preferring to re-poll from the same queue after a successful
509 <     * poll before trying others (see method topLevelExec).  This
510 <     * reduces fairness, which is partially counteracted by using a
511 <     * one-shot form of poll (tryPoll) that may lose to other workers.
512 <     *
513 <     * Deactivation. Method scan returns a sentinel when no tasks are
514 <     * found, leading to deactivation (see awaitWork). The count
515 <     * fields in ctl allow accurate discovery of quiescent states
516 <     * (i.e., when all workers are idle) after deactivation. However,
517 <     * this may also race with new (external) submissions, so a
518 <     * recheck is also needed to determine quiescence. Upon apparently
519 <     * triggering quiescence, awaitWork re-scans and self-signals if
520 <     * it may have missed a signal. In other cases, a missed signal
521 <     * may transiently lower parallelism because deactivation does not
522 <     * necessarily mean that there is no more work, only that that
523 <     * there were no tasks not taken by other workers.  But more
524 <     * signals are generated (see above) to eventually reactivate if
525 <     * needed.
526 <     *
527 <     * Trimming workers. To release resources after periods of lack of
528 <     * use, a worker starting to wait when the pool is quiescent will
529 <     * time out and terminate if the pool has remained quiescent for
530 <     * period given by field keepAlive.
494 >     * execution of) tasks by polling a pseodo-random permutation of
495 >     * the array (by starting at a random index, and using a constant
496 >     * cyclically exhaustive stride.) It uses the same basic polling
497 >     * method as WorkQueue.poll(), but restarts with a different
498 >     * permutation on each invocation. (Non-top-level scans; for
499 >     * example in helpJoin, use simpler and faster linear probes
500 >     * because they do not systematically contend with top-level
501 >     * scans.)  The pseudorandom generator need not have high-quality
502 >     * statistical properties in the long term. We use Marsaglia
503 >     * XorShifts, seeded with the Weyl sequence from ThreadLocalRandom
504 >     * probes, which are cheap and suffice. Scans do not otherwise
505 >     * explicitly take into account core affinities, loads, cache
506 >     * localities, etc, However, they do exploit temporal locality
507 >     * (which usually approximates these) by preferring to re-poll
508 >     * from the same queue (using method tryPoll()) after a successful
509 >     * poll before trying others (see method topLevelExec), which also
510 >     * reduces bookkeeping and scanning overhead.  This also reduces
511 >     * fairness, which is partially counteracted by giving up on
512 >     * contention.
513 >     *
514 >     * Deactivation. When method scan indicates that no tasks are
515 >     * found by a worker, it deactivates (see awaitWork).  Note that
516 >     * not finding tasks doesn't mean that there won't soon be
517 >     * some. Further, a scan may give up under contention, returning
518 >     * even without knowing whether any tasks are still present, which
519 >     * is OK, given the above signalling rules that will eventually
520 >     * maintain progress.  Blocking and unblocking via park/unpark can
521 >     * cause serious slowdowns when tasks are rapidly but irregularly
522 >     * generated (which is often due to garbage collectors and other
523 >     * activities). One way to ameliorate is for workers to rescan
524 >     * multiple times, even when there are unlikely to be tasks. But
525 >     * this causes enough memory and CAS contention to prefer using
526 >     * quieter spinwaits in awaitWork; currently set to small values
527 >     * that only cover near-miss scenarios for deactivate vs activate
528 >     * races. Because idle workers are often not yet blocked (via
529 >     * LockSupport.park), we use the WorkQueue access field to
530 >     * advertise that a waiter actually needs unparking upon signal.
531 >     *
532 >     * When idle workers are not continually woken up, the count
533 >     * fields in ctl allow efficient and accurate discovery of
534 >     * quiescent states (i.e., when all workers are idle) after
535 >     * deactivation. However, this voting mechanism alone does not
536 >     * guarantee that a pool can become dormant (quiesced or
537 >     * terminated), because external racing producers do not vote, and
538 >     * can asynchronously submit new tasks. To deal with this, the
539 >     * final unparked thread (in awaitWork) scans external queues to
540 >     * check for tasks that could have been added during a race window
541 >     * that would not be accompanied by a signal, in which case
542 >     * re-activating itself (or any other worker) to recheck. The same
543 >     * sets of checks are used in tryTerminate, to correctly trigger
544 >     * delayed termination (shutDown, followed by quiescence) in the
545 >     * presence of racing submissions. In all cases, the notion of the
546 >     * "final" unparked thread is an approximation, because new
547 >     * workers could be in the process of being constructed, which
548 >     * occasionally adds some extra unnecessary processing.
549       *
550       * Shutdown and Termination. A call to shutdownNow invokes
551       * tryTerminate to atomically set a mode bit. The calling thread,
552       * as well as every other worker thereafter terminating, helps
553       * terminate others by cancelling their unprocessed tasks, and
554 <     * waking them up. Calls to non-abrupt shutdown() preface this by
555 <     * checking isQuiescent before triggering the "STOP" phase of
556 <     * termination. To conform to ExecutorService invoke, invokeAll,
557 <     * and invokeAny specs, we must track pool status while waiting,
558 <     * and interrupt interruptible callers on termination (see
559 <     * ForkJoinTask.joinForPoolInvoke etc).
554 >     * interrupting other workers. Calls to non-abrupt shutdown()
555 >     * preface this by checking isQuiescent before triggering the
556 >     * "STOP" phase of termination. During termination, workers are
557 >     * stopped using all three of (often in parallel): releasing via
558 >     * ctl (method reactivate), interrupts, and cancelling tasks that
559 >     * will cause workers to not find work and exit. To support this,
560 >     * worker references not removed from the queues array during
561 >     * termination. It is possible for late thread creations to still
562 >     * be in progress after a quiescent termination reports terminated
563 >     * status, but they will also immediately terminate. To conform to
564 >     * ExecutorService invoke, invokeAll, and invokeAny specs, we must
565 >     * track pool status while waiting in ForkJoinTask.awaitDone, and
566 >     * interrupt interruptible callers on termination.
567 >     *
568 >     * Trimming workers. To release resources after periods of lack of
569 >     * use, a worker starting to wait when the pool is quiescent will
570 >     * time out and terminate if the pool has remained quiescent for
571 >     * period given by field keepAlive.
572       *
573       * Joining Tasks
574       * =============
575       *
576       * Normally, the first option when joining a task that is not done
577 <     * is to try to unfork it from local queue and run it.  Otherwise,
577 >     * is to try to take it from local queue and run it.  Otherwise,
578       * any of several actions may be taken when one worker is waiting
579       * to join a task stolen (or always held) by another.  Because we
580       * are multiplexing many tasks on to a pool of workers, we can't
# Line 525 | Line 597 | public class ForkJoinPool extends Abstra
597       * possible action of a compensator is to steal and execute the
598       * task being joined, the joining thread can do so directly,
599       * without the need for a compensation thread; although with a
600 <     * (rare) possibility of reduced parallelism because of a
601 <     * transient gap in the queue array.
600 >     * possibility of reduced parallelism because of a transient gap
601 >     * in the queue array that stalls stealers.
602       *
603       * Other intermediate forms available for specific task types (for
604       * example helpAsyncBlocker) often avoid or postpone the need for
# Line 536 | Line 608 | public class ForkJoinPool extends Abstra
608       * only on compensation in method awaitBlocker.
609       *
610       * The algorithm in helpJoin entails a form of "linear helping".
611 <     * Each worker records (in field "source") the id of the queue
612 <     * from which it last stole a task.  The scan in method helpJoin
613 <     * uses these markers to try to find a worker to help (i.e., steal
614 <     * back a task from and execute it) that could hasten completion
615 <     * of the actively joined task.  Thus, the joiner executes a task
616 <     * that would be on its own local deque if the to-be-joined task
617 <     * had not been stolen. This is a conservative variant of the
618 <     * approach described in Wagner & Calder "Leapfrogging: a portable
619 <     * technique for implementing efficient futures" SIGPLAN Notices,
620 <     * 1993 (http://portal.acm.org/citation.cfm?id=155354). It differs
621 <     * mainly in that we only record queue ids, not full dependency
611 >     * Each worker records (in field "source") a reference to the
612 >     * queue from which it last stole a task.  The scan in method
613 >     * helpJoin uses these markers to try to find a worker to help
614 >     * (i.e., steal back a task from and execute it) that could hasten
615 >     * completion of the actively joined task.  Thus, the joiner
616 >     * executes a task that would be on its own local deque if the
617 >     * to-be-joined task had not been stolen. This is a conservative
618 >     * variant of the approach described in Wagner & Calder
619 >     * "Leapfrogging: a portable technique for implementing efficient
620 >     * futures" SIGPLAN Notices, 1993
621 >     * (http://portal.acm.org/citation.cfm?id=155354). It differs
622 >     * mainly in that we only record queues, not full dependency
623       * links.  This requires a linear scan of the queues array to
624       * locate stealers, but isolates cost to when it is needed, rather
625 <     * than adding to per-task overhead. Also, searches are limited to
626 <     * direct and at most two levels of indirect stealers, after which
627 <     * there are rapidly diminishing returns on increased overhead.
628 <     * Searches can fail to locate stealers when stalls delay
629 <     * recording sources.  Further, even when accurately identified,
625 >     * than adding to per-task overhead.  For CountedCompleters, the
626 >     * analogous method helpComplete doesn't need stealer-tracking,
627 >     * but requires a similar check of completion chains.
628 >     *
629 >     * In either case, searches can fail to locate stealers when
630 >     * stalls delay recording sources. We avoid some of these cases by
631 >     * using snapshotted values of ctl as a check that the numbers of
632 >     * workers are not changing.  But even when accurately identified,
633       * stealers might not ever produce a task that the joiner can in
634       * turn help with. So, compensation is tried upon failure to find
635       * tasks to run.
636       *
561     * Joining CountedCompleters (see helpComplete) differs from (and
562     * is generally more efficient than) other cases because task
563     * eligibility is determined by checking completion chains rather
564     * than tracking stealers.
565     *
566     * Joining under timeouts (ForkJoinTask timed get) uses a
567     * constrained mixture of helping and compensating in part because
568     * pools (actually, only the common pool) may not have any
569     * available threads: If the pool is saturated (all available
570     * workers are busy), the caller tries to remove and otherwise
571     * help; else it blocks under compensation so that it may time out
572     * independently of any tasks.
573     *
637       * Compensation does not by default aim to keep exactly the target
638       * parallelism number of unblocked threads running at any given
639       * time. Some previous versions of this class employed immediate
# Line 590 | Line 653 | public class ForkJoinPool extends Abstra
653       * The static common pool always exists after static
654       * initialization.  Since it (or any other created pool) need
655       * never be used, we minimize initial construction overhead and
656 <     * footprint to the setup of about a dozen fields.
657 <     *
658 <     * When external threads submit to the common pool, they can
659 <     * perform subtask processing (see helpComplete and related
660 <     * methods) upon joins.  This caller-helps policy makes it
656 >     * footprint to the setup of about a dozen fields, although with
657 >     * some System property parsing and with security processing that
658 >     * takes far longer than the actual construction when
659 >     * SecurityManagers are used or properties are set. The common
660 >     * pool is distinguished internally by having both a null
661 >     * workerNamePrefix and ISCOMMON config bit set, along with
662 >     * PRESET_SIZE set if parallelism was configured by system
663 >     * property.
664 >     *
665 >     * When external threads use ForkJoinTask.fork for the common
666 >     * pool, they can perform subtask processing (see helpComplete and
667 >     * related methods) upon joins.  This caller-helps policy makes it
668       * sensible to set common pool parallelism level to one (or more)
669       * less than the total number of available cores, or even zero for
670 <     * pure caller-runs.  We do not need to record whether external
671 <     * submissions are to the common pool -- if not, external help
672 <     * methods return quickly. These submitters would otherwise be
673 <     * blocked waiting for completion, so the extra effort (with
674 <     * liberally sprinkled task status checks) in inapplicable cases
675 <     * amounts to an odd form of limited spin-wait before blocking in
606 <     * ForkJoinTask.join.
670 >     * pure caller-runs. For the sake of ExecutorService specs, we can
671 >     * only do this for tasks entered via fork, not submit.  We track
672 >     * this using a task status bit (markPoolSubmission).  In all
673 >     * other cases, external threads waiting for joins first check the
674 >     * common pool for their task, which fails quickly if the caller
675 >     * did not fork to common pool.
676       *
677       * Guarantees for common pool parallelism zero are limited to
678       * tasks that are joined by their callers in a tree-structured
679       * fashion or use CountedCompleters (as is true for jdk
680       * parallelStreams). Support infiltrates several methods,
681 <     * including those that retry helping steps until we are sure that
682 <     * none apply if there are no workers.
681 >     * including those that retry helping steps or spin until we are
682 >     * sure that none apply if there are no workers.
683       *
684       * As a more appropriate default in managed environments, unless
685       * overridden by system properties, we use workers of subclass
# Line 632 | Line 701 | public class ForkJoinPool extends Abstra
701       * should be cancelled anyway. Interrupts are cleared only when
702       * necessary to ensure that calls to LockSupport.park do not loop
703       * indefinitely (park returns immediately if the current thread is
704 <     * interrupted). If so, interruption is reinstated after blocking
705 <     * if status could be visible during the scope of any task.  For
706 <     * cases in which task bodies are specified or desired to
638 <     * interrupt upon cancellation, ForkJoinTask.cancel can be
639 <     * overridden to do so (as is done for invoke{Any,All}).
704 >     * interrupted).  For cases in which task bodies are specified or
705 >     * desired to interrupt upon cancellation, ForkJoinTask.cancel can
706 >     * be overridden to do so (as is done for invoke{Any,All}).
707       *
708       * Memory placement
709       * ================
710       *
711 <     * Performance can be very sensitive to placement of instances of
712 <     * ForkJoinPool and WorkQueues and their queue arrays. To reduce
713 <     * false-sharing impact, the @Contended annotation isolates the
714 <     * ForkJoinPool.ctl field as well as the most heavily written
715 <     * WorkQueue fields. These mainly reduce cache traffic by scanners.
716 <     * WorkQueue arrays are presized large enough to avoid resizing
717 <     * (which transiently reduces throughput) in most tree-like
718 <     * computations, although not in some streaming usages. Initial
719 <     * sizes are not large enough to avoid secondary contention
720 <     * effects (especially for GC cardmarks) when queues are placed
721 <     * near each other in memory. This is common, but has different
722 <     * impact in different collectors and remains incompletely
723 <     * addressed.
711 >     * Performance is very sensitive to placement of instances of
712 >     * ForkJoinPool and WorkQueues and their queue arrays, as well the
713 >     * placement of their fields. Caches misses and contention due to
714 >     * false-sharing have been observed to slow down some programs by
715 >     * more than a factor of four. There is no perfect solution, in
716 >     * part because isolating more fields also generates more cache
717 >     * misses in more common cases (because some fields snd slots are
718 >     * usually read at the same time), and the main means of placing
719 >     * memory, the @Contended annotation provides only rough control
720 >     * (for good reason). We isolate the ForkJoinPool.ctl field as
721 >     * well the set of WorkQueue fields that otherwise cause the most
722 >     * false-sharing misses with respect to other fields. Also,
723 >     * ForkJoinPool fields are ordered such that fields less prone to
724 >     * contention effects are first, offsetting those that otherwise
725 >     * would be, while also reducing total footprint vs using
726 >     * multiple @Contended regions, which tends to slow down
727 >     * less-contended applications.  These arrangements mainly reduce
728 >     * cache traffic by scanners, which speeds up finding tasks to
729 >     * run.  Initial sizing and resizing of WorkQueue arrays is an
730 >     * even more delicate tradeoff because the best strategy may vary
731 >     * across garbage collectors. Small arrays are better for locality
732 >     * and reduce GC scan time, but large arrays reduce both direct
733 >     * false-sharing and indirect cases due to GC bookkeeping
734 >     * (cardmarks etc), and reduce the number of resizes, which are
735 >     * not especially fast because they require atomic transfers, and
736 >     * may cause other scanning workers to stall or give up.
737 >     * Currently, arrays are initialized to be fairly small but early
738 >     * resizes rapidly increase size by more than a factor of two
739 >     * until very large.  (Maintenance note: any changes in fields,
740 >     * queues, or their uses must be accompanied by re-evaluation of
741 >     * these placement and sizing decisions.)
742       *
743       * Style notes
744       * ===========
745       *
746       * Memory ordering relies mainly on atomic operations (CAS,
747 <     * getAndSet, getAndAdd) along with explicit fences. These use
747 >     * getAndSet, getAndAdd) along with moded accesses. These use
748       * jdk-internal Unsafe for atomics and special memory modes,
749       * rather than VarHandles, to avoid initialization dependencies in
750       * other jdk components that require early parallelism.  This can
# Line 667 | Line 752 | public class ForkJoinPool extends Abstra
752       * outcomes across the unusual cases that arise in very racy code
753       * with very few invariants. All fields are read into locals
754       * before use, and null-checked if they are references, even if
755 <     * they can never be null under current usages.  Array accesses
756 <     * using masked indices include checks (that are always true) that
757 <     * the array length is non-zero to avoid compilers inserting more
758 <     * expensive traps.  This is usually done in a "C"-like style of
759 <     * listing declarations at the heads of methods or blocks, and
760 <     * using inline assignments on first encounter.  Nearly all
761 <     * explicit checks lead to bypass/return, not exception throws,
762 <     * because they may legitimately arise during shutdown.
755 >     * they can never be null under current usages. Usually,
756 >     * computations (held in local variables) are defined as soon as
757 >     * logically enabled, sometimes to convince compilers that they
758 >     * may be performed despite memory ordering constraints.  Array
759 >     * accesses using masked indices include checks (that are always
760 >     * true) that the array length is non-zero to avoid compilers
761 >     * inserting more expensive traps.  This is usually done in a
762 >     * "C"-like style of listing declarations at the heads of methods
763 >     * or blocks, and using inline assignments on first encounter.
764 >     * Nearly all explicit checks lead to bypass/return, not exception
765 >     * throws, because they may legitimately arise during shutdown. A
766 >     * few unusual loop constructions encourage (with varying
767 >     * effectiveness) JVMs about where (not) to place safepoints.
768       *
769       * There is a lot of representation-level coupling among classes
770       * ForkJoinPool, ForkJoinWorkerThread, and ForkJoinTask.  The
# Line 691 | Line 781 | public class ForkJoinPool extends Abstra
781       * perform reasonably even when interpreted (not compiled).
782       *
783       * The order of declarations in this file is (with a few exceptions):
784 <     * (1) Static utility functions
785 <     * (2) Nested (static) classes
786 <     * (3) Static fields
784 >     * (1) Static constants
785 >     * (2) Static utility functions
786 >     * (3) Nested (static) classes
787       * (4) Fields, along with constants used when unpacking some of them
788       * (5) Internal control methods
789       * (6) Callbacks and other support for ForkJoinTask methods
# Line 703 | Line 793 | public class ForkJoinPool extends Abstra
793       * Revision notes
794       * ==============
795       *
796 <     * The main sources of differences of January 2020 ForkJoin
707 <     * classes from previous version are:
796 >     * The main sources of differences from previous version are:
797       *
798 <     * * ForkJoinTask now uses field "aux" to support blocking joins
799 <     *   and/or record exceptions, replacing reliance on builtin
800 <     *   monitors and side tables.
801 <     * * Scans probe slots (vs compare indices), along with related
802 <     *   changes that reduce performance differences across most
803 <     *   garbage collectors, and reduce contention.
804 <     * * Refactoring for better integration of special task types and
805 <     *   other capabilities that had been incrementally tacked on. Plus
806 <     *   many minor reworkings to improve consistency.
798 >     * * Use of Unsafe vs VarHandle, including re-instatement of some
799 >     *   constructions from pre-VarHandle versions.
800 >     * * Reduced memory and signal contention, mainly by distinguishing
801 >     *   failure cases.
802 >     * * Improved initialization, in part by preparing for possible
803 >     *   removal of SecurityManager
804 >     * * Enable resizing (includes refactoring quiescence/termination)
805 >     * * Unification of most internal vs external operations; some made
806 >     *   possible via use of WorkQueue.access, and POOLSUBMIT status in tasks.
807       */
808  
809 +    // static configuration constants
810 +
811 +    /**
812 +     * Default idle timeout value (in milliseconds) for idle threads
813 +     * to park waiting for new work before terminating.
814 +     */
815 +    static final long DEFAULT_KEEPALIVE = 60_000L;
816 +
817 +    /**
818 +     * Undershoot tolerance for idle timeouts
819 +     */
820 +    static final long TIMEOUT_SLOP = 20L;
821 +
822 +    /**
823 +     * The default value for common pool maxSpares.  Overridable using
824 +     * the "java.util.concurrent.ForkJoinPool.common.maximumSpares"
825 +     * system property.  The default value is far in excess of normal
826 +     * requirements, but also far short of MAX_CAP and typical OS
827 +     * thread limits, so allows JVMs to catch misuse/abuse before
828 +     * running out of resources needed to do so.
829 +     */
830 +    static final int DEFAULT_COMMON_MAX_SPARES = 256;
831 +
832 +    /**
833 +     * Initial capacity of work-stealing queue array.  Must be a power
834 +     * of two, at least 2. See above.
835 +     */
836 +    static final int INITIAL_QUEUE_CAPACITY = 1 << 6;
837 +
838 +    // Bounds
839 +    static final int SWIDTH    = 16;               // width of short
840 +    static final int SMASK     = 0xffff;           // short bits == max index
841 +    static final int MAX_CAP   = 0x7fff;           // max #workers - 1
842 +
843 +    // pool.runState and workQueue.access bits and sentinels
844 +    static final int STOP         = 1 << 31;       // must be negative
845 +    static final int SHUTDOWN     = 1;
846 +    static final int TERMINATED   = 2;
847 +    static final int PARKED       = -1;            // access value when parked
848 +
849 +    // {pool, workQueue}.config bits
850 +    static final int FIFO         = 1 << 16;       // fifo queue or access mode
851 +    static final int SRC          = 1 << 17;       // set when stealable
852 +    static final int INNOCUOUS    = 1 << 18;       // set for Innocuous workers
853 +    static final int TRIMMED      = 1 << 19;       // timed out while idle
854 +    static final int ISCOMMON     = 1 << 20;       // set for common pool
855 +    static final int PRESET_SIZE  = 1 << 21;       // size was set by property
856 +
857 +    static final int UNCOMPENSATE = 1 << 16;       // tryCompensate return
858 +
859 +    /*
860 +     * Bits and masks for ctl and bounds are packed with 4 16 bit subfields:
861 +     * RC: Number of released (unqueued) workers
862 +     * TC: Number of total workers
863 +     * SS: version count and status of top waiting thread
864 +     * ID: poolIndex of top of Treiber stack of waiters
865 +     *
866 +     * When convenient, we can extract the lower 32 stack top bits
867 +     * (including version bits) as sp=(int)ctl. When sp is non-zero,
868 +     * there are waiting workers.  Count fields may be transiently
869 +     * negative during termination because of out-of-order updates.
870 +     * To deal with this, we use casts in and out of "short" and/or
871 +     * signed shifts to maintain signedness. Because it occupies
872 +     * uppermost bits, we can add one release count using getAndAdd of
873 +     * RC_UNIT, rather than CAS, when returning from a blocked join.
874 +     * Other updates of multiple subfields require CAS.
875 +     */
876 +
877 +    // Lower and upper word masks
878 +    static final long SP_MASK  = 0xffffffffL;
879 +    static final long UC_MASK  = ~SP_MASK;
880 +    // Release counts
881 +    static final int  RC_SHIFT = 48;
882 +    static final long RC_UNIT  = 0x0001L << RC_SHIFT;
883 +    static final long RC_MASK  = 0xffffL << RC_SHIFT;
884 +    // Total counts
885 +    static final int  TC_SHIFT = 32;
886 +    static final long TC_UNIT  = 0x0001L << TC_SHIFT;
887 +    static final long TC_MASK  = 0xffffL << TC_SHIFT;
888 +    // sp bits
889 +    static final int SS_SEQ    = 1 << 16;  // version count
890 +    static final int INACTIVE  = 1 << 31;  // phase bit when idle
891 +
892      // Static utilities
893  
894      /**
895       * If there is a security manager, makes sure caller has
896       * permission to modify threads.
897       */
726    private static void checkPermission() {
727        @SuppressWarnings("removal")
728        SecurityManager security = System.getSecurityManager();
729        if (security != null)
730            security.checkPermission(modifyThreadPermission);
731    }
732
898      @SuppressWarnings("removal")
899 <    static AccessControlContext contextWithPermissions(Permission ... perms) {
900 <        Permissions permissions = new Permissions();
901 <        for (Permission perm : perms)
902 <            permissions.add(perm);
903 <        return new AccessControlContext(
904 <            new ProtectionDomain[] { new ProtectionDomain(null, permissions) });
899 >    private static void checkPermission() {
900 >        SecurityManager security; RuntimePermission perm;
901 >        if ((security = System.getSecurityManager()) != null) {
902 >            if ((perm = modifyThreadPermission) == null)
903 >                modifyThreadPermission = perm = // races OK
904 >                    new RuntimePermission("modifyThread");
905 >            security.checkPermission(perm);
906 >        }
907      }
908  
909      // Nested classes
# Line 771 | Line 938 | public class ForkJoinPool extends Abstra
938       * new ForkJoinWorkerThread using the system class loader as the
939       * thread context class loader.
940       */
774    @SuppressWarnings("removal")
941      static final class DefaultForkJoinWorkerThreadFactory
942          implements ForkJoinWorkerThreadFactory {
777        // ACC for access to the factory
778        private static final AccessControlContext ACC = contextWithPermissions(
779            new RuntimePermission("getClassLoader"),
780            new RuntimePermission("setContextClassLoader"));
943          public final ForkJoinWorkerThread newThread(ForkJoinPool pool) {
944 +            boolean isCommon = (pool.workerNamePrefix == null);
945 +            @SuppressWarnings("removal")
946 +            SecurityManager sm = System.getSecurityManager();
947 +            if (sm == null)
948 +                return new ForkJoinWorkerThread(null, pool, true);
949 +            else if (isCommon)
950 +                return newCommonWithACC(pool);
951 +            else
952 +                return newRegularWithACC(pool);
953 +        }
954 +
955 +        /*
956 +         * Create and use static AccessControlContexts only if there
957 +         * is a SecurityManager. (These can be removed if/when
958 +         * SecurityManagers are removed from platform.) The ACCs are
959 +         * immutable and equivalent even when racily initialized, so
960 +         * they don't require locking, although with the chance of
961 +         * needlessly duplicate construction.
962 +         */
963 +        @SuppressWarnings("removal")
964 +        static volatile AccessControlContext regularACC, commonACC;
965 +
966 +        @SuppressWarnings("removal")
967 +        static ForkJoinWorkerThread newRegularWithACC(ForkJoinPool pool) {
968 +            AccessControlContext acc = regularACC;
969 +            if (acc == null) {
970 +                Permissions ps = new Permissions();
971 +                ps.add(new RuntimePermission("getClassLoader"));
972 +                ps.add(new RuntimePermission("setContextClassLoader"));
973 +                regularACC = acc =
974 +                    new AccessControlContext(new ProtectionDomain[] {
975 +                            new ProtectionDomain(null, ps) });
976 +            }
977              return AccessController.doPrivileged(
978                  new PrivilegedAction<>() {
979                      public ForkJoinWorkerThread run() {
980 <                        return new ForkJoinWorkerThread(null, pool, true, false);
981 <                    }},
787 <                ACC);
980 >                        return new ForkJoinWorkerThread(null, pool, true);
981 >                    }}, acc);
982          }
789    }
983  
791    /**
792     * Factory for CommonPool unless overridden by System property.
793     * Creates InnocuousForkJoinWorkerThreads if a security manager is
794     * present at time of invocation.  Support requires that we break
795     * quite a lot of encapsulation (some via helper methods in
796     * ThreadLocalRandom) to access and set Thread fields.
797     */
798    @SuppressWarnings("removal")
799    static final class DefaultCommonPoolForkJoinWorkerThreadFactory
800        implements ForkJoinWorkerThreadFactory {
984          @SuppressWarnings("removal")
985 <        private static final AccessControlContext ACC = contextWithPermissions(
986 <            modifyThreadPermission,
987 <            new RuntimePermission("enableContextClassLoaderOverride"),
988 <            new RuntimePermission("modifyThreadGroup"),
989 <            new RuntimePermission("getClassLoader"),
990 <            new RuntimePermission("setContextClassLoader"));
991 <
992 <        public final ForkJoinWorkerThread newThread(ForkJoinPool pool) {
993 <            @SuppressWarnings("removal")
994 <            ForkJoinWorkerThread t =
995 <                AccessController.doPrivileged(
996 <                 new PrivilegedAction<>() {
997 <                     public ForkJoinWorkerThread run() {
998 <                         return System.getSecurityManager() == null ?
999 <                             new ForkJoinWorkerThread(null, pool, true, true):
1000 <                             new ForkJoinWorkerThread.
1001 <                             InnocuousForkJoinWorkerThread(pool); }},
1002 <                 ACC);
1003 <            return t;
985 >        static ForkJoinWorkerThread newCommonWithACC(ForkJoinPool pool) {
986 >            AccessControlContext acc = commonACC;
987 >            if (acc == null) {
988 >                Permissions ps = new Permissions();
989 >                ps.add(new RuntimePermission("getClassLoader"));
990 >                ps.add(new RuntimePermission("setContextClassLoader"));
991 >                ps.add(new RuntimePermission("modifyThread"));
992 >                ps.add(new RuntimePermission("enableContextClassLoaderOverride"));
993 >                ps.add(new RuntimePermission("modifyThreadGroup"));
994 >                commonACC = acc =
995 >                    new AccessControlContext(new ProtectionDomain[] {
996 >                            new ProtectionDomain(null, ps) });
997 >            }
998 >            return AccessController.doPrivileged(
999 >                new PrivilegedAction<>() {
1000 >                    public ForkJoinWorkerThread run() {
1001 >                        return new ForkJoinWorkerThread.
1002 >                            InnocuousForkJoinWorkerThread(pool);
1003 >                    }}, acc);
1004          }
1005      }
1006  
824    // Constants shared across ForkJoinPool and WorkQueue
825
826    // Bounds
827    static final int SWIDTH       = 16;            // width of short
828    static final int SMASK        = 0xffff;        // short bits == max index
829    static final int MAX_CAP      = 0x7fff;        // max #workers - 1
830
831    // Masks and units for WorkQueue.phase and ctl sp subfield
832    static final int UNSIGNALLED  = 1 << 31;       // must be negative
833    static final int SS_SEQ       = 1 << 16;       // version count
834
835    // Mode bits and sentinels, some also used in WorkQueue fields
836    static final int FIFO         = 1 << 16;       // fifo queue or access mode
837    static final int SRC          = 1 << 17;       // set for valid queue ids
838    static final int INNOCUOUS    = 1 << 18;       // set for Innocuous workers
839    static final int QUIET        = 1 << 19;       // quiescing phase or source
840    static final int SHUTDOWN     = 1 << 24;
841    static final int TERMINATED   = 1 << 25;
842    static final int UNSTOPPABLE  = 1 << 26;       // true for common pool
843    static final int STOP         = 1 << 31;       // must be negative
844    static final int UNCOMPENSATE = 1 << 16;       // tryCompensate return
845
846    /**
847     * Initial capacity of work-stealing queue array.  Must be a power
848     * of two, at least 2. See above.
849     */
850    static final int INITIAL_QUEUE_CAPACITY = 1 << 8;
851
1007      /**
1008       * Queues supporting work-stealing as well as external task
1009       * submission. See above for descriptions and algorithms.
1010       */
1011      static final class WorkQueue {
857        volatile int phase;        // versioned, negative if inactive
1012          int stackPred;             // pool stack (ctl) predecessor link
1013          int config;                // index, mode, ORed with SRC after init
1014          int base;                  // index of next slot for poll
1015          ForkJoinTask<?>[] array;   // the queued tasks; power of 2 size
1016          final ForkJoinWorkerThread owner; // owning thread or null if shared
1017  
1018 <        // segregate fields frequently updated but not read by scans or steals
1018 >        // fields otherwise causing more unnecessary false-sharing cache misses
1019          @jdk.internal.vm.annotation.Contended("w")
1020          int top;                   // index of next slot for push
1021          @jdk.internal.vm.annotation.Contended("w")
1022 <        volatile int source;       // source queue id, lock, or sentinel
1022 >        volatile int access;       // values 0, 1 (locked), PARKED, STOP
1023 >        @jdk.internal.vm.annotation.Contended("w")
1024 >        volatile int phase;        // versioned, negative if inactive
1025 >        @jdk.internal.vm.annotation.Contended("w")
1026 >        volatile int source;       // source queue id in topLevelExec
1027          @jdk.internal.vm.annotation.Contended("w")
1028          int nsteals;               // number of steals from other queues
1029  
1030          // Support for atomic operations
1031          private static final Unsafe U;
1032 <        private static final long SOURCE;
1033 <        private static final long BASE;
1032 >        private static final long ACCESS;
1033 >        private static final long PHASE;
1034          private static final long ABASE;
1035 <        private static final int ASHIFT;
1036 <        @SuppressWarnings("removal")
1037 <        static final ForkJoinTask<?> getSlot(ForkJoinTask<?>[] a, int i) {
880 <            return (ForkJoinTask<?>)
881 <                U.getObjectAcquire(a, ((long)i << ASHIFT) + ABASE);
882 <        }
883 <        @SuppressWarnings("removal")
884 <        static final ForkJoinTask<?> getAndClearSlot(ForkJoinTask<?>[] a,
885 <                                                     int i) {
1035 >        private static final int  ASHIFT;
1036 >
1037 >        static ForkJoinTask<?> getAndClearSlot(ForkJoinTask<?>[] a, int i) {
1038              return (ForkJoinTask<?>)
1039 <                U.getAndSetObject(a, ((long)i << ASHIFT) + ABASE, null);
1039 >                U.getAndSetReference(a, ((long)i << ASHIFT) + ABASE, null);
1040          }
1041 <        @SuppressWarnings("removal")
1042 <        static final void setSlotVolatile(ForkJoinTask<?>[] a, int i,
1043 <                                          ForkJoinTask<?> v) {
1044 <            U.putObjectVolatile(a, ((long)i << ASHIFT) + ABASE, v);
1041 >        static boolean casSlotToNull(ForkJoinTask<?>[] a, int i,
1042 >                                     ForkJoinTask<?> c) {
1043 >            return U.compareAndSetReference(a, ((long)i << ASHIFT) + ABASE,
1044 >                                            c, null);
1045          }
1046 <        @SuppressWarnings("removal")
1047 <        static final boolean casSlotToNull(ForkJoinTask<?>[] a, int i,
896 <                                          ForkJoinTask<?> c) {
897 <            return U.compareAndSetObject(a, ((long)i << ASHIFT) + ABASE, c, null);
1046 >        final void forcePhaseActive() {    // clear sign bit
1047 >            U.getAndBitwiseAndInt(this, PHASE, 0x7fffffff);
1048          }
1049 <        final boolean tryLock() {
1050 <            return U.compareAndSetInt(this, SOURCE, 0, 1);
901 <        }
902 <        final void setBaseOpaque(int b) {
903 <            U.putIntOpaque(this, BASE, b);
1049 >        final int getAndSetAccess(int v) {
1050 >            return U.getAndSetInt(this, ACCESS, v);
1051          }
1052  
1053          /**
1054 <         * Constructor used by ForkJoinWorkerThreads. Most fields
1055 <         * are initialized upon thread start, in pool.registerWorker.
1054 >         * Constructor. For owned queues, most fields are initialized
1055 >         * upon thread start in pool.registerWorker.
1056           */
1057 <        WorkQueue(ForkJoinWorkerThread owner, boolean isInnocuous) {
911 <            this.config = (isInnocuous) ? INNOCUOUS : 0;
1057 >        WorkQueue(ForkJoinWorkerThread owner, int config) {
1058              this.owner = owner;
913        }
914
915        /**
916         * Constructor used for external queues.
917         */
918        WorkQueue(int config) {
919            array = new ForkJoinTask<?>[INITIAL_QUEUE_CAPACITY];
1059              this.config = config;
921            owner = null;
922            phase = -1;
1060          }
1061  
1062          /**
# Line 933 | Line 1070 | public class ForkJoinPool extends Abstra
1070           * Returns the approximate number of tasks in the queue.
1071           */
1072          final int queueSize() {
1073 <            VarHandle.acquireFence(); // ensure fresh reads by external callers
937 <            int n = top - base;
1073 >            int unused = access, n = top - base; // for ordering effect
1074              return (n < 0) ? 0 : n;   // ignore transient negative
1075          }
1076  
1077          /**
1078 <         * Provides a more conservative estimate of whether this queue
943 <         * has any tasks than does queueSize.
944 <         */
945 <        final boolean isEmpty() {
946 <            return !((source != 0 && owner == null) || top - base > 0);
947 <        }
948 <
949 <        /**
950 <         * Pushes a task. Call only by owner in unshared queues.
1078 >         * Pushes a task. Called only by owner or if already locked
1079           *
1080           * @param task the task. Caller must ensure non-null.
1081 <         * @param pool if nonnull, pool to signal if was empty
1081 >         * @param pool the pool. Must be non-null unless terminating.
1082 >         * @param signalIfEmpty true if signal when pushing to empty queue
1083           * @throws RejectedExecutionException if array cannot be resized
1084           */
1085 <        final void push(ForkJoinTask<?> task, ForkJoinPool pool) {
1086 <            ForkJoinTask<?>[] a = array;
1087 <            int s = top++, d = s - base, cap, m; // skip insert if disabled
1088 <            if (a != null && (cap = a.length) > 0) {
1089 <                setSlotVolatile(a, (m = cap - 1) & s, task);
1090 <                if (d == m)
1091 <                    growArray();
1092 <                if ((d == m || a[m & (s - 1)] == null) && pool != null)
1093 <                    pool.signalWork(); // signal if was empty or resized
1094 <            }
1095 <        }
1096 <
1097 <        /**
1098 <         * Pushes task to a shared queue with lock already held, and unlocks.
1099 <         *
1100 <         * @return true if caller should signal work
1101 <         */
1102 <        final boolean lockedPush(ForkJoinTask<?> task) {
1103 <            ForkJoinTask<?>[] a = array;
1104 <            int s = top++, d = s - base, cap, m;
1105 <            if (a != null && (cap = a.length) > 0) {
1106 <                a[(m = cap - 1) & s] = task;
1107 <                if (d == m)
1108 <                    growArray();
980 <                source = 0; // unlock
981 <                if (d == m || a[m & (s - 1)] == null)
982 <                    return true;
983 <            }
984 <            return false;
985 <        }
986 <
987 <        /**
988 <         * Doubles the capacity of array. Called by owner or with lock
989 <         * held after pre-incrementing top, which is reverted on
990 <         * allocation failure.
991 <         */
992 <        final void growArray() {
993 <            ForkJoinTask<?>[] oldArray = array, newArray;
994 <            int s = top - 1, oldCap, newCap;
995 <            if (oldArray != null && (oldCap = oldArray.length) > 0 &&
996 <                (newCap = oldCap << 1) > 0) { // skip if disabled
997 <                try {
998 <                    newArray = new ForkJoinTask<?>[newCap];
999 <                } catch (Throwable ex) {
1000 <                    top = s;
1001 <                    if (owner == null)
1002 <                        source = 0; // unlock
1003 <                    throw new RejectedExecutionException(
1004 <                        "Queue capacity exceeded");
1005 <                }
1006 <                int newMask = newCap - 1, oldMask = oldCap - 1;
1007 <                for (int k = oldCap; k > 0; --k, --s) {
1008 <                    ForkJoinTask<?> x;        // poll old, push to new
1009 <                    if ((x = getAndClearSlot(oldArray, s & oldMask)) == null)
1010 <                        break;                // others already taken
1011 <                    newArray[s & newMask] = x;
1085 >        final void push(ForkJoinTask<?> task, ForkJoinPool pool,
1086 >                        boolean signalIfEmpty) {
1087 >            boolean resize = false;
1088 >            int s = top++, b = base, cap, m; ForkJoinTask<?>[] a;
1089 >            if ((a = array) != null && (cap = a.length) > 0) {
1090 >                if ((m = (cap - 1)) == s - b) {
1091 >                    resize = true;            // rapidly grow until large
1092 >                    int newCap = (cap < 1 << 24) ? cap << 2 : cap << 1;
1093 >                    ForkJoinTask<?>[] newArray;
1094 >                    try {
1095 >                        newArray = new ForkJoinTask<?>[newCap];
1096 >                    } catch (Throwable ex) {
1097 >                        top = s;
1098 >                        access = 0;
1099 >                        throw new RejectedExecutionException(
1100 >                            "Queue capacity exceeded");
1101 >                    }
1102 >                    if (newCap > 0) {         // always true
1103 >                        int newMask = newCap - 1, k = s;
1104 >                        do {                  // poll old, push to new
1105 >                            newArray[k-- & newMask] = task;
1106 >                        } while ((task = getAndClearSlot(a, k & m)) != null);
1107 >                    }
1108 >                    array = newArray;
1109                  }
1110 <                VarHandle.releaseFence();     // fill before publish
1111 <                array = newArray;
1110 >                else
1111 >                    a[m & s] = task;
1112 >                getAndSetAccess(0);           // for memory effects if owned
1113 >                if ((resize || (a[m & (s - 1)] == null && signalIfEmpty)) &&
1114 >                    pool != null)
1115 >                    pool.signalWork();
1116              }
1117          }
1118  
1018        // Variants of pop
1019
1119          /**
1120 <         * Pops and returns task, or null if empty. Called only by owner.
1120 >         * Takes next task, if one exists, in order specified by mode,
1121 >         * so acts as either local-pop or local-poll. Called only by owner.
1122 >         * @param fifo nonzero if FIFO mode
1123           */
1124 <        private ForkJoinTask<?> pop() {
1124 >        final ForkJoinTask<?> nextLocalTask(int fifo) {
1125              ForkJoinTask<?> t = null;
1126 <            int s = top, cap; ForkJoinTask<?>[] a;
1127 <            if ((a = array) != null && (cap = a.length) > 0 && base != s-- &&
1128 <                (t = getAndClearSlot(a, (cap - 1) & s)) != null)
1129 <                top = s;
1126 >            ForkJoinTask<?>[] a = array;
1127 >            int p = top, s = p - 1, b = base, nb, cap;
1128 >            if (p - b > 0 && a != null && (cap = a.length) > 0) {
1129 >                do {
1130 >                    if (fifo == 0 || (nb = b + 1) == p) {
1131 >                        if ((t = getAndClearSlot(a, (cap - 1) & s)) != null) {
1132 >                            top = s;
1133 >                            U.storeFence();
1134 >                        }
1135 >                        break;                   // lost race for only task
1136 >                    }
1137 >                    else if ((t = getAndClearSlot(a, (cap - 1) & b)) != null) {
1138 >                        base = nb;
1139 >                        U.storeFence();
1140 >                        break;
1141 >                    }
1142 >                    else {
1143 >                        while (b == (b = base)) {
1144 >                            U.loadFence();
1145 >                            Thread.onSpinWait(); // spin to reduce memory traffic
1146 >                        }
1147 >                    }
1148 >                } while (p - b > 0);
1149 >            }
1150              return t;
1151          }
1152  
1153          /**
1154 <         * Pops the given task for owner only if it is at the current top.
1154 >         * Takes next task, if one exists, using configured mode.
1155 >         * (Always owned, never called for Common pool.)
1156           */
1157 <        final boolean tryUnpush(ForkJoinTask<?> task) {
1158 <            int s = top, cap; ForkJoinTask<?>[] a;
1037 <            if ((a = array) != null && (cap = a.length) > 0 && base != s-- &&
1038 <                casSlotToNull(a, (cap - 1) & s, task)) {
1039 <                top = s;
1040 <                return true;
1041 <            }
1042 <            return false;
1157 >        final ForkJoinTask<?> nextLocalTask() {
1158 >            return nextLocalTask(config & FIFO);
1159          }
1160  
1161          /**
1162 <         * Locking version of tryUnpush.
1162 >         * Pops the given task only if it is at the current top.
1163           */
1164 <        final boolean externalTryUnpush(ForkJoinTask<?> task) {
1164 >        final boolean tryUnpush(ForkJoinTask<?> task, boolean owned) {
1165              boolean taken = false;
1166 <            for (;;) {
1167 <                int s = top, cap, k; ForkJoinTask<?>[] a;
1168 <                if ((a = array) == null || (cap = a.length) <= 0 ||
1169 <                    a[k = (cap - 1) & (s - 1)] != task)
1170 <                    break;
1171 <                if (tryLock()) {
1172 <                    if (top == s && array == a) {
1173 <                        if (taken = casSlotToNull(a, k, task)) {
1174 <                            top = s - 1;
1175 <                            source = 0;
1060 <                            break;
1061 <                        }
1166 >            ForkJoinTask<?>[] a = array;
1167 >            int p = top, s, cap, k;
1168 >            if (task != null && base != p && a != null && (cap = a.length) > 0 &&
1169 >                a[k = (cap - 1) & (s = p - 1)] == task) {
1170 >                if (owned || getAndSetAccess(1) == 0) {
1171 >                    if ((owned || (top == p && a[k] == task)) &&
1172 >                        getAndClearSlot(a, k) != null) {
1173 >                        taken = true;
1174 >                        top = s;
1175 >                        U.storeFence();
1176                      }
1177 <                    source = 0; // release lock for retry
1177 >                    if (!owned)
1178 >                        access = 0;
1179                  }
1065                Thread.yield(); // trylock failure
1180              }
1181              return taken;
1182          }
1183  
1184          /**
1185 <         * Deep form of tryUnpush: Traverses from top and removes task if
1072 <         * present, shifting others to fill gap.
1185 >         * Returns next task, if one exists, in order specified by mode.
1186           */
1187 <        final boolean tryRemove(ForkJoinTask<?> task, boolean owned) {
1188 <            boolean taken = false;
1189 <            int p = top, cap; ForkJoinTask<?>[] a; ForkJoinTask<?> t;
1190 <            if ((a = array) != null && task != null && (cap = a.length) > 0) {
1191 <                int m = cap - 1, s = p - 1, d = p - base;
1192 <                for (int i = s, k; d > 0; --i, --d) {
1193 <                    if ((t = a[k = i & m]) == task) {
1194 <                        if (owned || tryLock()) {
1195 <                            if ((owned || (array == a && top == p)) &&
1196 <                                (taken = casSlotToNull(a, k, t))) {
1197 <                                for (int j = i; j != s; ) // shift down
1085 <                                    a[j & m] = getAndClearSlot(a, ++j & m);
1086 <                                top = s;
1087 <                            }
1088 <                            if (!owned)
1089 <                                source = 0;
1090 <                        }
1091 <                        break;
1187 >        final ForkJoinTask<?> peek() {
1188 >            ForkJoinTask<?>[] a = array;
1189 >            int cfg = config, p = top, b = base, cap;
1190 >            if (p != b && a != null && (cap = a.length) > 0) {
1191 >                if ((cfg & FIFO) == 0)
1192 >                    return a[(cap - 1) & (p - 1)];
1193 >                else { // skip over in-progress removals
1194 >                    ForkJoinTask<?> t;
1195 >                    for ( ; p - b > 0; ++b) {
1196 >                        if ((t = a[(cap - 1) & b]) != null)
1197 >                            return t;
1198                      }
1199                  }
1200              }
1201 <            return taken;
1201 >            return null;
1202          }
1203  
1098        // variants of poll
1099
1204          /**
1205 <         * Tries once to poll next task in FIFO order, failing on
1206 <         * inconsistency or contention.
1205 >         * Polls for a task. Used only by non-owners in usually
1206 >         * uncontended contexts.
1207 >         *
1208 >         * @param pool if nonnull, pool to signal if more tasks exist
1209           */
1210 <        final ForkJoinTask<?> tryPoll() {
1211 <            int cap, b, k; ForkJoinTask<?>[] a;
1212 <            if ((a = array) != null && (cap = a.length) > 0) {
1213 <                ForkJoinTask<?> t = getSlot(a, k = (cap - 1) & (b = base));
1214 <                if (base == b++ && t != null && casSlotToNull(a, k, t)) {
1215 <                    setBaseOpaque(b);
1210 >        final ForkJoinTask<?> poll(ForkJoinPool pool) {
1211 >            for (int b = base;;) {
1212 >                int cap; ForkJoinTask<?>[] a;
1213 >                if ((a = array) == null || (cap = a.length) <= 0)
1214 >                    break;                        // currently impossible
1215 >                int k = (cap - 1) & b, nb = b + 1, nk = (cap - 1) & nb;
1216 >                ForkJoinTask<?> t = a[k];
1217 >                U.loadFence();                    // for re-reads
1218 >                if (b != (b = base))              // inconsistent
1219 >                    ;
1220 >                else if (t != null && casSlotToNull(a, k, t)) {
1221 >                    base = nb;
1222 >                    U.storeFence();
1223 >                    if (pool != null && a[nk] != null)
1224 >                        pool.signalWork();        // propagate
1225                      return t;
1226                  }
1227 +                else if (array != a || a[k] != null)
1228 +                    ;                             // stale
1229 +                else if (a[nk] == null && top - b <= 0)
1230 +                    break;                        // empty
1231              }
1232              return null;
1233          }
1234  
1235          /**
1236 <         * Takes next task, if one exists, in order specified by mode.
1236 >         * Tries to poll next task in FIFO order, failing on
1237 >         * contention or stalls. Used only by topLevelExec to repoll
1238 >         * from the queue obtained from pool.scan.
1239           */
1240 <        final ForkJoinTask<?> nextLocalTask(int cfg) {
1241 <            ForkJoinTask<?> t = null;
1121 <            int s = top, cap; ForkJoinTask<?>[] a;
1240 >        final ForkJoinTask<?> tryPoll() {
1241 >            int b = base, cap; ForkJoinTask<?>[] a;
1242              if ((a = array) != null && (cap = a.length) > 0) {
1243 <                for (int b, d;;) {
1244 <                    if ((d = s - (b = base)) <= 0)
1245 <                        break;
1246 <                    if (d == 1 || (cfg & FIFO) == 0) {
1247 <                        if ((t = getAndClearSlot(a, --s & (cap - 1))) != null)
1248 <                            top = s;
1249 <                        break;
1250 <                    }
1251 <                    if ((t = getAndClearSlot(a, b++ & (cap - 1))) != null) {
1252 <                        setBaseOpaque(b);
1253 <                        break;
1243 >                for (;;) {
1244 >                    int k = (cap - 1) & b, nb = b + 1;
1245 >                    ForkJoinTask<?> t = a[k];
1246 >                    U.loadFence();                // for re-reads
1247 >                    if (b != (b = base))
1248 >                        ;                         // inconsistent
1249 >                    else if (t != null) {
1250 >                        if (casSlotToNull(a, k, t)) {
1251 >                            base = nb;
1252 >                            U.storeFence();
1253 >                            return t;
1254 >                        }
1255 >                        break;                   // contended
1256                      }
1257 +                    else if (a[k] == null)
1258 +                        break;                   // empty or stalled
1259                  }
1260              }
1261 <            return t;
1138 <        }
1139 <
1140 <        /**
1141 <         * Takes next task, if one exists, using configured mode.
1142 <         */
1143 <        final ForkJoinTask<?> nextLocalTask() {
1144 <            return nextLocalTask(config);
1145 <        }
1146 <
1147 <        /**
1148 <         * Returns next task, if one exists, in order specified by mode.
1149 <         */
1150 <        final ForkJoinTask<?> peek() {
1151 <            VarHandle.acquireFence();
1152 <            int cap; ForkJoinTask<?>[] a;
1153 <            return ((a = array) != null && (cap = a.length) > 0) ?
1154 <                a[(cap - 1) & ((config & FIFO) != 0 ? base : top - 1)] : null;
1261 >            return null;
1262          }
1263  
1264          // specialized execution methods
1265  
1266          /**
1267           * Runs the given (stolen) task if nonnull, as well as
1268 <         * remaining local tasks and/or others available from the
1269 <         * given queue.
1268 >         * remaining local tasks and/or others available from its
1269 >         * source queue, if any.
1270           */
1271 <        final void topLevelExec(ForkJoinTask<?> task, WorkQueue q) {
1272 <            int cfg = config, nstolen = 1;
1271 >        final void topLevelExec(ForkJoinTask<?> task, WorkQueue src) {
1272 >            int cfg = config, fifo = cfg & FIFO, nstolen = 1;
1273              while (task != null) {
1274                  task.doExec();
1275 <                if ((task = nextLocalTask(cfg)) == null &&
1276 <                    q != null && (task = q.tryPoll()) != null)
1275 >                if ((task = nextLocalTask(fifo)) == null &&
1276 >                    src != null && (task = src.tryPoll()) != null)
1277                      ++nstolen;
1278              }
1279              nsteals += nstolen;
# Line 1176 | Line 1283 | public class ForkJoinPool extends Abstra
1283          }
1284  
1285          /**
1286 +         * Deep form of tryUnpush: Traverses from top and removes and
1287 +         * runs task if present, shifting others to fill gap.
1288 +         * @return task status if removed, else 0
1289 +         */
1290 +        final int tryRemoveAndExec(ForkJoinTask<?> task, boolean owned) {
1291 +            boolean taken = false;
1292 +            ForkJoinTask<?>[] a = array;
1293 +            int p = top, s = p - 1, d = p - base, cap;
1294 +            if (task != null && d > 0 && a != null && (cap = a.length) > 0) {
1295 +                for (int m = cap - 1, i = s; ; --i) {
1296 +                    ForkJoinTask<?> t; int k;
1297 +                    if ((t = a[k = i & m]) == task) {
1298 +                        if (!owned && getAndSetAccess(1) != 0)
1299 +                            break;                 // fail if locked
1300 +                        if ((owned || (top == p && a[k] == task)) &&
1301 +                            getAndClearSlot(a, k) != null) {
1302 +                            taken = true;
1303 +                            if (i != s && i == base)
1304 +                                base = i + 1;      // avoid shift
1305 +                            else {
1306 +                                for (int j = i; j != s;) // shift down
1307 +                                    a[j & m] = getAndClearSlot(a, ++j & m);
1308 +                                top = s;
1309 +                            }
1310 +                            U.storeFence();
1311 +                        }
1312 +                        if (!owned)
1313 +                            access = 0;
1314 +                        break;
1315 +                    }
1316 +                    else if (t == null || --d == 0)
1317 +                        break;
1318 +                }
1319 +            }
1320 +            if (!taken)
1321 +                return 0;
1322 +            return task.doExec();
1323 +        }
1324 +
1325 +        /**
1326           * Tries to pop and run tasks within the target's computation
1327           * until done, not found, or limit exceeded.
1328           *
1329 <         * @param task root of CountedCompleter computation
1183 <         * @param owned true if owned by a ForkJoinWorkerThread
1329 >         * @param task root of computation
1330           * @param limit max runs, or zero for no limit
1331           * @return task status on exit
1332           */
1333          final int helpComplete(ForkJoinTask<?> task, boolean owned, int limit) {
1334 <            int status = 0, cap, k, p, s; ForkJoinTask<?>[] a; ForkJoinTask<?> t;
1335 <            while (task != null && (status = task.status) >= 0 &&
1336 <                   (a = array) != null && (cap = a.length) > 0 &&
1337 <                   (t = a[k = (cap - 1) & (s = (p = top) - 1)])
1338 <                   instanceof CountedCompleter) {
1339 <                CountedCompleter<?> f = (CountedCompleter<?>)t;
1340 <                boolean taken = false;
1341 <                for (;;) {     // exec if root task is a completer of t
1342 <                    if (f == task) {
1343 <                        if (owned) {
1344 <                            if ((taken = casSlotToNull(a, k, t)))
1199 <                                top = s;
1200 <                        }
1201 <                        else if (tryLock()) {
1202 <                            if (top == p && array == a &&
1203 <                                (taken = casSlotToNull(a, k, t)))
1204 <                                top = s;
1205 <                            source = 0;
1206 <                        }
1207 <                        if (taken)
1208 <                            t.doExec();
1209 <                        else if (!owned)
1210 <                            Thread.yield(); // tryLock failure
1334 >            int status = 0;
1335 >            if (task != null) {
1336 >                outer: for (;;) {
1337 >                    boolean taken = false;
1338 >                    ForkJoinTask<?>[] a; ForkJoinTask<?> t;
1339 >                    int p, s, cap, k;
1340 >                    if ((status = task.status) < 0)
1341 >                        return status;
1342 >                    if ((a = array) == null || (cap = a.length) <= 0 ||
1343 >                        (t = a[k = (cap - 1) & (s = (p = top) - 1)]) == null ||
1344 >                        !(t instanceof CountedCompleter))
1345                          break;
1346 +                    for (CountedCompleter<?> f = (CountedCompleter<?>)t;;) {
1347 +                        if (f == task)
1348 +                            break;
1349 +                        else if ((f = f.completer) == null)
1350 +                            break outer;       // ineligible
1351 +                    }
1352 +                    if (!owned && getAndSetAccess(1) != 0)
1353 +                        break;                 // fail if locked
1354 +                    if ((owned || (top == p && a[k] == t)) &&
1355 +                        getAndClearSlot(a, k) != null) {
1356 +                        taken = true;
1357 +                        top = s;
1358 +                        U.storeFence();
1359 +                    }
1360 +                    if (!owned)
1361 +                        access = 0;
1362 +                    if (taken) {
1363 +                        t.doExec();
1364 +                        if (limit != 0 && --limit == 0)
1365 +                            break;
1366                      }
1213                    else if ((f = f.completer) == null)
1214                        break;
1367                  }
1368 <                if (taken && limit != 0 && --limit == 0)
1217 <                    break;
1368 >                status = task.status;
1369              }
1370              return status;
1371          }
1372  
1373          /**
1374           * Tries to poll and run AsynchronousCompletionTasks until
1375 <         * none found or blocker is released.
1375 >         * none found or blocker is released
1376           *
1377           * @param blocker the blocker
1378           */
1379          final void helpAsyncBlocker(ManagedBlocker blocker) {
1380 <            int cap, b, d, k; ForkJoinTask<?>[] a; ForkJoinTask<?> t;
1381 <            while (blocker != null && (d = top - (b = base)) > 0 &&
1382 <                   (a = array) != null && (cap = a.length) > 0 &&
1383 <                   (((t = getSlot(a, k = (cap - 1) & b)) == null && d > 1) ||
1384 <                    t instanceof
1385 <                    CompletableFuture.AsynchronousCompletionTask) &&
1386 <                   !blocker.isReleasable()) {
1387 <                if (t != null && base == b++ && casSlotToNull(a, k, t)) {
1388 <                    setBaseOpaque(b);
1389 <                    t.doExec();
1380 >            if (blocker != null) {
1381 >                for (;;) {
1382 >                    int b = base, cap; ForkJoinTask<?>[] a;
1383 >                    if ((a = array) == null || (cap = a.length) <= 0 || b == top)
1384 >                        break;
1385 >                    int k = (cap - 1) & b, nb = b + 1, nk = (cap - 1) & nb;
1386 >                    ForkJoinTask<?> t = a[k];
1387 >                    U.loadFence();                     // for re-reads
1388 >                    if (base != b)
1389 >                        ;
1390 >                    else if (blocker.isReleasable())
1391 >                        break;
1392 >                    else if (a[k] != t)
1393 >                        ;
1394 >                    else if (t != null) {
1395 >                        if (!(t instanceof CompletableFuture
1396 >                              .AsynchronousCompletionTask))
1397 >                            break;
1398 >                        else if (casSlotToNull(a, k, t)) {
1399 >                            base = nb;
1400 >                            U.storeFence();
1401 >                            t.doExec();
1402 >                        }
1403 >                    }
1404 >                    else if (a[nk] == null)
1405 >                        break;
1406                  }
1407              }
1408          }
1409  
1410          // misc
1411  
1245        /** AccessControlContext for innocuous workers, created on 1st use. */
1246        @SuppressWarnings("removal")
1247        private static AccessControlContext INNOCUOUS_ACC;
1248
1249        /**
1250         * Initializes (upon registration) InnocuousForkJoinWorkerThreads.
1251         */
1252        @SuppressWarnings("removal")
1253        final void initializeInnocuousWorker() {
1254            AccessControlContext acc; // racy construction OK
1255            if ((acc = INNOCUOUS_ACC) == null)
1256                INNOCUOUS_ACC = acc = new AccessControlContext(
1257                    new ProtectionDomain[] { new ProtectionDomain(null, null) });
1258            Thread t = Thread.currentThread();
1259            ThreadLocalRandom.setInheritedAccessControlContext(t, acc);
1260            ThreadLocalRandom.eraseThreadLocals(t);
1261        }
1262
1412          /**
1413           * Returns true if owned by a worker thread and not known to be blocked.
1414           */
1415          final boolean isApparentlyUnblocked() {
1416              Thread wt; Thread.State s;
1417 <            return ((wt = owner) != null &&
1417 >            return (access != STOP && (wt = owner) != null &&
1418                      (s = wt.getState()) != Thread.State.BLOCKED &&
1419                      s != Thread.State.WAITING &&
1420                      s != Thread.State.TIMED_WAITING);
1421          }
1422  
1423 +        /**
1424 +         * Callback from InnocuousForkJoinWorkerThread.onStart
1425 +         */
1426 +        final void setInnocuous() {
1427 +            config |= INNOCUOUS;
1428 +        }
1429 +
1430          static {
1431              U = Unsafe.getUnsafe();
1432 <            int scale = U.arrayIndexScale(ForkJoinTask[].class);
1432 >            Class<WorkQueue> klass = WorkQueue.class;
1433 >            ACCESS = U.objectFieldOffset(klass, "access");
1434 >            PHASE = U.objectFieldOffset(klass, "phase");
1435 >            Class<ForkJoinTask[]> aklass = ForkJoinTask[].class;
1436 >            ABASE = U.arrayBaseOffset(aklass);
1437 >            int scale = U.arrayIndexScale(aklass);
1438 >            ASHIFT = 31 - Integer.numberOfLeadingZeros(scale);
1439              if ((scale & (scale - 1)) != 0)
1440                  throw new Error("array index scale not a power of two");
1279            ASHIFT = 31 - Integer.numberOfLeadingZeros(scale);
1280            ABASE = U.arrayBaseOffset(ForkJoinTask[].class);
1281            SOURCE = U.objectFieldOffset(WorkQueue.class, "source");
1282            BASE = U.objectFieldOffset(WorkQueue.class, "base");
1441          }
1442      }
1443  
# Line 1293 | Line 1451 | public class ForkJoinPool extends Abstra
1451          defaultForkJoinWorkerThreadFactory;
1452  
1453      /**
1454 <     * Permission required for callers of methods that may start or
1455 <     * kill threads.
1454 >     * Common (static) pool. Non-null for public use unless a static
1455 >     * construction exception, but internal usages null-check on use
1456 >     * to paranoically avoid potential initialization circularities
1457 >     * as well as to simplify generated code.
1458       */
1459 <    static final RuntimePermission modifyThreadPermission;
1459 >    static final ForkJoinPool common;
1460  
1461      /**
1462       * Sequence number for creating worker names
1463       */
1464      private static volatile int poolIds;
1465  
1306    // static configuration constants
1307
1308    /**
1309     * Default idle timeout value (in milliseconds) for the thread
1310     * triggering quiescence to park waiting for new work
1311     */
1312    private static final long DEFAULT_KEEPALIVE = 60_000L;
1313
1314    /**
1315     * Undershoot tolerance for idle timeouts
1316     */
1317    private static final long TIMEOUT_SLOP = 20L;
1318
1466      /**
1467 <     * The default value for common pool maxSpares.  Overridable using
1468 <     * the "java.util.concurrent.ForkJoinPool.common.maximumSpares"
1322 <     * system property.  The default value is far in excess of normal
1323 <     * requirements, but also far short of MAX_CAP and typical OS
1324 <     * thread limits, so allows JVMs to catch misuse/abuse before
1325 <     * running out of resources needed to do so.
1326 <     */
1327 <    private static final int DEFAULT_COMMON_MAX_SPARES = 256;
1328 <
1329 <    /*
1330 <     * Bits and masks for field ctl, packed with 4 16 bit subfields:
1331 <     * RC: Number of released (unqueued) workers minus target parallelism
1332 <     * TC: Number of total workers minus target parallelism
1333 <     * SS: version count and status of top waiting thread
1334 <     * ID: poolIndex of top of Treiber stack of waiters
1335 <     *
1336 <     * When convenient, we can extract the lower 32 stack top bits
1337 <     * (including version bits) as sp=(int)ctl.  The offsets of counts
1338 <     * by the target parallelism and the positionings of fields makes
1339 <     * it possible to perform the most common checks via sign tests of
1340 <     * fields: When ac is negative, there are not enough unqueued
1341 <     * workers, when tc is negative, there are not enough total
1342 <     * workers.  When sp is non-zero, there are waiting workers.  To
1343 <     * deal with possibly negative fields, we use casts in and out of
1344 <     * "short" and/or signed shifts to maintain signedness.
1345 <     *
1346 <     * Because it occupies uppermost bits, we can add one release
1347 <     * count using getAndAdd of RC_UNIT, rather than CAS, when
1348 <     * returning from a blocked join.  Other updates entail multiple
1349 <     * subfields and masking, requiring CAS.
1350 <     *
1351 <     * The limits packed in field "bounds" are also offset by the
1352 <     * parallelism level to make them comparable to the ctl rc and tc
1353 <     * fields.
1467 >     * Permission required for callers of methods that may start or
1468 >     * kill threads. Lazily constructed.
1469       */
1470 +    static volatile RuntimePermission modifyThreadPermission;
1471  
1356    // Lower and upper word masks
1357    private static final long SP_MASK    = 0xffffffffL;
1358    private static final long UC_MASK    = ~SP_MASK;
1359
1360    // Release counts
1361    private static final int  RC_SHIFT   = 48;
1362    private static final long RC_UNIT    = 0x0001L << RC_SHIFT;
1363    private static final long RC_MASK    = 0xffffL << RC_SHIFT;
1364
1365    // Total counts
1366    private static final int  TC_SHIFT   = 32;
1367    private static final long TC_UNIT    = 0x0001L << TC_SHIFT;
1368    private static final long TC_MASK    = 0xffffL << TC_SHIFT;
1369    private static final long ADD_WORKER = 0x0001L << (TC_SHIFT + 15); // sign
1472  
1473      // Instance fields
1372
1373    final long keepAlive;                // milliseconds before dropping if idle
1474      volatile long stealCount;            // collects worker nsteals
1475 <    int scanRover;                       // advances across pollScan calls
1476 <    volatile int threadIds;              // for worker thread names
1477 <    final int bounds;                    // min, max threads packed as shorts
1478 <    volatile int mode;                   // parallelism, runstate, queue mode
1475 >    volatile long threadIds;             // for worker thread names
1476 >    final long keepAlive;                // milliseconds before dropping if idle
1477 >    final long bounds;                   // min, max threads packed as shorts
1478 >    final int config;                    // static configuration bits
1479 >    volatile int runState;               // SHUTDOWN, STOP, TERMINATED bits
1480      WorkQueue[] queues;                  // main registry
1481      final ReentrantLock registrationLock;
1482      Condition termination;               // lazily constructed
# Line 1383 | Line 1484 | public class ForkJoinPool extends Abstra
1484      final ForkJoinWorkerThreadFactory factory;
1485      final UncaughtExceptionHandler ueh;  // per-worker UEH
1486      final Predicate<? super ForkJoinPool> saturate;
1487 +    //    final SharedThreadContainer container; // for loom
1488  
1489      @jdk.internal.vm.annotation.Contended("fjpctl") // segregate
1490      volatile long ctl;                   // main pool control
1491 +    @jdk.internal.vm.annotation.Contended("fjpctl") // colocate
1492 +    int parallelism;                     // target number of workers
1493  
1494      // Support for atomic operations
1495      private static final Unsafe U;
1496      private static final long CTL;
1497 <    private static final long MODE;
1497 >    private static final long RUNSTATE;
1498 >    private static final long PARALLELISM;
1499      private static final long THREADIDS;
1500      private static final long POOLIDS;
1501 +
1502      private boolean compareAndSetCtl(long c, long v) {
1503          return U.compareAndSetLong(this, CTL, c, v);
1504      }
# Line 1402 | Line 1508 | public class ForkJoinPool extends Abstra
1508      private long getAndAddCtl(long v) {
1509          return U.getAndAddLong(this, CTL, v);
1510      }
1511 <    private int getAndBitwiseOrMode(int v) {
1512 <        return U.getAndBitwiseOrInt(this, MODE, v);
1511 >    private int getAndBitwiseOrRunState(int v) {
1512 >        return U.getAndBitwiseOrInt(this, RUNSTATE, v);
1513      }
1514 <    private int getAndAddThreadIds(int x) {
1515 <        return U.getAndAddInt(this, THREADIDS, x);
1514 >    private long incrementThreadIds() {
1515 >        return U.getAndAddLong(this, THREADIDS, 1L);
1516      }
1517      private static int getAndAddPoolIds(int x) {
1518          return U.getAndAddInt(ForkJoinPool.class, POOLIDS, x);
1519      }
1520 +    private int getAndSetParallelism(int v) {
1521 +        return U.getAndSetInt(this, PARALLELISM, v);
1522 +    }
1523 +    private int getParallelismOpaque() {
1524 +        return U.getIntOpaque(this, PARALLELISM);
1525 +    }
1526  
1527 <    // Creating, registering and deregistering workers
1527 >    // Creating, registering, and deregistering workers
1528  
1529      /**
1530       * Tries to construct and start one worker. Assumes that total
# Line 1426 | Line 1538 | public class ForkJoinPool extends Abstra
1538          Throwable ex = null;
1539          ForkJoinWorkerThread wt = null;
1540          try {
1541 <            if (fac != null && (wt = fac.newThread(this)) != null) {
1541 >            if (runState >= 0 &&  // avoid construction if terminating
1542 >                fac != null && (wt = fac.newThread(this)) != null) {
1543                  wt.start();
1544 +                //                container.start(wt); // for loom
1545                  return true;
1546              }
1547          } catch (Throwable rex) {
# Line 1442 | Line 1556 | public class ForkJoinPool extends Abstra
1556       */
1557      final String nextWorkerThreadName() {
1558          String prefix = workerNamePrefix;
1559 <        int tid = getAndAddThreadIds(1) + 1;
1559 >        long tid = incrementThreadIds() + 1L;
1560          if (prefix == null) // commonPool has no prefix
1561              prefix = "ForkJoinPool.commonPool-worker-";
1562 <        return prefix.concat(Integer.toString(tid));
1562 >        return prefix.concat(Long.toString(tid));
1563      }
1564  
1565      /**
# Line 1454 | Line 1568 | public class ForkJoinPool extends Abstra
1568       * @param w caller's WorkQueue
1569       */
1570      final void registerWorker(WorkQueue w) {
1457        ReentrantLock lock = registrationLock;
1571          ThreadLocalRandom.localInit();
1572          int seed = ThreadLocalRandom.getProbe();
1573 +        ReentrantLock lock = registrationLock;
1574 +        int cfg = config & FIFO;
1575          if (w != null && lock != null) {
1461            int modebits = (mode & FIFO) | w.config;
1576              w.array = new ForkJoinTask<?>[INITIAL_QUEUE_CAPACITY];
1577 <            w.stackPred = seed;                         // stash for runWorker
1578 <            if ((modebits & INNOCUOUS) != 0)
1465 <                w.initializeInnocuousWorker();
1577 >            cfg |= w.config | SRC;
1578 >            w.stackPred = seed;
1579              int id = (seed << 1) | 1;                   // initial index guess
1580              lock.lock();
1581              try {
# Line 1472 | Line 1585 | public class ForkJoinPool extends Abstra
1585                      for (; qs[id &= m] != null && k > 0; id -= 2, k -= 2);
1586                      if (k == 0)
1587                          id = n | 1;                     // resize below
1588 <                    w.phase = w.config = id | modebits; // now publishable
1588 >                    w.phase = w.config = id | cfg;      // now publishable
1589  
1590                      if (id < n)
1591                          qs[id] = w;
# Line 1487 | Line 1600 | public class ForkJoinPool extends Abstra
1600                              if ((q = qs[j]) != null)    // shared queues may move
1601                                  as[q.config & am] = q;
1602                          }
1603 <                        VarHandle.releaseFence();       // fill before publish
1603 >                        U.storeFence();                 // fill before publish
1604                          queues = as;
1605                      }
1606                  }
# Line 1507 | Line 1620 | public class ForkJoinPool extends Abstra
1620       * @param ex the exception causing failure, or null if none
1621       */
1622      final void deregisterWorker(ForkJoinWorkerThread wt, Throwable ex) {
1623 <        ReentrantLock lock = registrationLock;
1624 <        WorkQueue w = null;
1625 <        int cfg = 0;
1626 <        if (wt != null && (w = wt.workQueue) != null && lock != null) {
1627 <            WorkQueue[] qs; int n, i;
1628 <            cfg = w.config;
1623 >        WorkQueue w = (wt == null) ? null : wt.workQueue;
1624 >        int cfg = (w == null) ? 0 : w.config;
1625 >        long c = ctl;
1626 >        if ((cfg & TRIMMED) == 0)             // decrement counts
1627 >            do {} while (c != (c = compareAndExchangeCtl(
1628 >                                   c, ((RC_MASK & (c - RC_UNIT)) |
1629 >                                       (TC_MASK & (c - TC_UNIT)) |
1630 >                                       (SP_MASK & c)))));
1631 >        else if ((int)c == 0)                 // was dropped on timeout
1632 >            cfg &= ~SRC;                      // suppress signal if last
1633 >        if (!tryTerminate(false, false) && w != null) {
1634 >            ReentrantLock lock; WorkQueue[] qs; int n, i;
1635              long ns = w.nsteals & 0xffffffffL;
1636 <            lock.lock();                             // remove index from array
1637 <            if ((qs = queues) != null && (n = qs.length) > 0 &&
1638 <                qs[i = cfg & (n - 1)] == w)
1639 <                qs[i] = null;
1640 <            stealCount += ns;                        // accumulate steals
1641 <            lock.unlock();
1642 <            long c = ctl;
1643 <            if ((cfg & QUIET) == 0) // unless self-signalled, decrement counts
1644 <                do {} while (c != (c = compareAndExchangeCtl(
1645 <                                       c, ((RC_MASK & (c - RC_UNIT)) |
1527 <                                           (TC_MASK & (c - TC_UNIT)) |
1528 <                                           (SP_MASK & c)))));
1529 <            else if ((int)c == 0)                    // was dropped on timeout
1530 <                cfg = 0;                             // suppress signal if last
1531 <            for (ForkJoinTask<?> t; (t = w.pop()) != null; )
1532 <                ForkJoinTask.cancelIgnoringExceptions(t); // cancel tasks
1636 >            if ((lock = registrationLock) != null) {
1637 >                lock.lock();                  // remove index unless terminating
1638 >                if ((qs = queues) != null && (n = qs.length) > 0 &&
1639 >                    qs[i = cfg & (n - 1)] == w)
1640 >                    qs[i] = null;
1641 >                stealCount += ns;             // accumulate steals
1642 >                lock.unlock();
1643 >            }
1644 >            if ((cfg & SRC) != 0)
1645 >                signalWork();                 // possibly replace worker
1646          }
1647 <
1648 <        if (!tryTerminate(false, false) && w != null && (cfg & SRC) != 0)
1649 <            signalWork();                            // possibly replace worker
1650 <        if (ex != null)
1647 >        if (ex != null) {
1648 >            if (w != null) {
1649 >                w.access = STOP;              // cancel tasks
1650 >                for (ForkJoinTask<?> t; (t = w.nextLocalTask(0)) != null; )
1651 >                    ForkJoinTask.cancelIgnoringExceptions(t);
1652 >            }
1653              ForkJoinTask.rethrow(ex);
1654 +        }
1655      }
1656  
1657      /*
1658 <     * Tries to create or release a worker if too few are running.
1658 >     * Releases an idle worker, or creates one if not enough exist.
1659       */
1660      final void signalWork() {
1661 <        for (long c = ctl; c < 0L;) {
1662 <            int sp, i; WorkQueue[] qs; WorkQueue v;
1663 <            if ((sp = (int)c & ~UNSIGNALLED) == 0) {  // no idle workers
1664 <                if ((c & ADD_WORKER) == 0L)           // enough total workers
1661 >        int pc = parallelism, n;
1662 >        long c = ctl;
1663 >        WorkQueue[] qs = queues;
1664 >        if ((short)(c >>> RC_SHIFT) < pc && qs != null && (n = qs.length) > 0) {
1665 >            for (;;) {
1666 >                boolean create = false;
1667 >                int sp = (int)c & ~INACTIVE;
1668 >                WorkQueue v = qs[sp & (n - 1)];
1669 >                int deficit = pc - (short)(c >>> TC_SHIFT);
1670 >                long ac = (c + RC_UNIT) & RC_MASK, nc;
1671 >                if (sp != 0 && v != null)
1672 >                    nc = (v.stackPred & SP_MASK) | (c & TC_MASK);
1673 >                else if (deficit <= 0)
1674                      break;
1675 <                if (c == (c = compareAndExchangeCtl(
1676 <                              c, ((RC_MASK & (c + RC_UNIT)) |
1677 <                                  (TC_MASK & (c + TC_UNIT)))))) {
1678 <                    createWorker();
1675 >                else {
1676 >                    create = true;
1677 >                    nc = ((c + TC_UNIT) & TC_MASK);
1678 >                }
1679 >                if (c == (c = compareAndExchangeCtl(c, nc | ac))) {
1680 >                    if (create)
1681 >                        createWorker();
1682 >                    else {
1683 >                        Thread owner = v.owner;
1684 >                        v.phase = sp;
1685 >                        if (v.access == PARKED)
1686 >                            LockSupport.unpark(owner);
1687 >                    }
1688                      break;
1689                  }
1690              }
1691 <            else if ((qs = queues) == null)
1692 <                break;                                // unstarted/terminated
1693 <            else if (qs.length <= (i = sp & SMASK))
1694 <                break;                                // terminated
1695 <            else if ((v = qs[i]) == null)
1696 <                break;                                // terminating
1697 <            else {
1698 <                long nc = (v.stackPred & SP_MASK) | (UC_MASK & (c + RC_UNIT));
1699 <                Thread vt = v.owner;
1700 <                if (c == (c = compareAndExchangeCtl(c, nc))) {
1701 <                    v.phase = sp;
1702 <                    LockSupport.unpark(vt);           // release idle worker
1691 >        }
1692 >    }
1693 >
1694 >    /**
1695 >     * Reactivates any idle worker, if one exists.
1696 >     *
1697 >     * @return the signalled worker, or null if none
1698 >     */
1699 >    private WorkQueue reactivate() {
1700 >        WorkQueue[] qs; int n;
1701 >        long c = ctl;
1702 >        if ((qs = queues) != null && (n = qs.length) > 0) {
1703 >            for (;;) {
1704 >                int sp = (int)c & ~INACTIVE;
1705 >                WorkQueue v = qs[sp & (n - 1)];
1706 >                long ac = UC_MASK & (c + RC_UNIT);
1707 >                if (sp == 0 || v == null)
1708                      break;
1709 +                if (c == (c = compareAndExchangeCtl(
1710 +                              c, (v.stackPred & SP_MASK) | ac))) {
1711 +                    Thread owner = v.owner;
1712 +                    v.phase = sp;
1713 +                    if (v.access == PARKED)
1714 +                        LockSupport.unpark(owner);
1715 +                    return v;
1716                  }
1717              }
1718          }
1719 +        return null;
1720 +    }
1721 +
1722 +    /**
1723 +     * Tries to deactivate worker w; called only on idle timeout.
1724 +     */
1725 +    private boolean tryTrim(WorkQueue w) {
1726 +        if (w != null) {
1727 +            int pred = w.stackPred, cfg = w.config | TRIMMED;
1728 +            long c = ctl;
1729 +            int sp = (int)c & ~INACTIVE;
1730 +            if ((sp & SMASK) == (cfg & SMASK) &&
1731 +                compareAndSetCtl(c, ((pred & SP_MASK) |
1732 +                                     (UC_MASK & (c - TC_UNIT))))) {
1733 +                w.config = cfg;  // add sentinel for deregisterWorker
1734 +                w.phase = sp;
1735 +                return true;
1736 +            }
1737 +        }
1738 +        return false;
1739 +    }
1740 +
1741 +    /**
1742 +     * Returns true if any submission queue is detectably nonempty.
1743 +     * Accurate only when workers are quiescent; else conservatively
1744 +     * approximate.
1745 +     */
1746 +    private boolean hasSubmissions() {
1747 +        WorkQueue[] qs; WorkQueue q;
1748 +        int n = ((qs = queues) == null) ? 0 : qs.length;
1749 +        for (int i = 0; i < n; i += 2) {
1750 +            if ((q = qs[i]) != null && (q.access > 0 || q.top - q.base > 0))
1751 +                return true;
1752 +        }
1753 +        return false;
1754      }
1755  
1756      /**
# Line 1579 | Line 1760 | public class ForkJoinPool extends Abstra
1760       * @param w caller's WorkQueue (may be null on failed initialization)
1761       */
1762      final void runWorker(WorkQueue w) {
1763 <        if (mode >= 0 && w != null) {           // skip on failed init
1583 <            w.config |= SRC;                    // mark as valid source
1763 >        if (w != null) {                        // skip on failed init
1764              int r = w.stackPred, src = 0;       // use seed from registerWorker
1765              do {
1766                  r ^= r << 13; r ^= r >>> 17; r ^= r << 5; // xorshift
1767              } while ((src = scan(w, src, r)) >= 0 ||
1768                       (src = awaitWork(w)) == 0);
1769 +            w.access = STOP;                    // record normal termination
1770          }
1771      }
1772  
1773      /**
1774       * Scans for and if found executes top-level tasks: Tries to poll
1775       * each queue starting at a random index with random stride,
1776 <     * returning source id or retry indicator if contended or
1596 <     * inconsistent.
1776 >     * returning source id or retry indicator.
1777       *
1778       * @param w caller's WorkQueue
1779       * @param prevSrc the previous queue stolen from in current phase, or 0
# Line 1604 | Line 1784 | public class ForkJoinPool extends Abstra
1784          WorkQueue[] qs = queues;
1785          int n = (w == null || qs == null) ? 0 : qs.length;
1786          for (int step = (r >>> 16) | 1, i = n; i > 0; --i, r += step) {
1787 <            int j, cap, b; WorkQueue q; ForkJoinTask<?>[] a;
1788 <            if ((q = qs[j = r & (n - 1)]) != null && // poll at qs[j].array[k]
1787 >            int j, cap; WorkQueue q; ForkJoinTask<?>[] a;
1788 >            if ((q = qs[j = r & (n - 1)]) != null &&
1789                  (a = q.array) != null && (cap = a.length) > 0) {
1790 <                int k = (cap - 1) & (b = q.base), nextBase = b + 1;
1791 <                int nextIndex = (cap - 1) & nextBase, src = j | SRC;
1792 <                ForkJoinTask<?> t = WorkQueue.getSlot(a, k);
1790 >                int src = j | SRC, b = q.base;
1791 >                int k = (cap - 1) & b, nb = b + 1, nk = (cap - 1) & nb;
1792 >                ForkJoinTask<?> t = a[k];
1793 >                U.loadFence();                  // for re-reads
1794                  if (q.base != b)                // inconsistent
1795                      return prevSrc;
1796                  else if (t != null && WorkQueue.casSlotToNull(a, k, t)) {
1797 <                    q.base = nextBase;
1798 <                    ForkJoinTask<?> next = a[nextIndex];
1799 <                    if ((w.source = src) != prevSrc && next != null)
1797 >                    q.base = nb;
1798 >                    w.source = src;
1799 >                    if (prevSrc == 0 && q.base == nb && a[nk] != null)
1800                          signalWork();           // propagate
1801                      w.topLevelExec(t, q);
1802                      return src;
1803                  }
1804 <                else if (a[nextIndex] != null)  // revisit
1805 <                    return prevSrc;
1804 >                else if (q.array != a || a[k] != null || a[nk] != null)
1805 >                    return prevSrc;             // revisit
1806              }
1807          }
1808 <        return (queues != qs) ? prevSrc: -1;    // possibly resized
1808 >        return -1;
1809      }
1810  
1811      /**
1812 <     * Advances worker phase, pushes onto ctl stack, and awaits signal
1632 <     * or reports termination.
1812 >     * Advances phase, enqueues, and awaits signal or termination.
1813       *
1814       * @return negative if terminated, else 0
1815       */
1816      private int awaitWork(WorkQueue w) {
1817 <        int phase = (w.phase + SS_SEQ) & ~UNSIGNALLED;
1818 <        w.phase = phase | UNSIGNALLED;       // advance phase
1819 <        long prevCtl = ctl, c;               // enqueue
1820 <        do {
1821 <            w.stackPred = (int)prevCtl;
1822 <            c = ((prevCtl - RC_UNIT) & UC_MASK) | (phase & SP_MASK);
1823 <        } while (prevCtl != (prevCtl = compareAndExchangeCtl(prevCtl, c)));
1824 <        long initialCtl = c;
1825 <        LockSupport.setCurrentBlocker(this); // prepare to block (exit also OK)
1826 <        long deadline = 0L;                  // nonzero for timed wait
1827 <        boolean checkInterrupt = false;      // alternate with park calls
1828 <        boolean terminate = false;
1829 <        for (int md;;) {
1830 <            if (terminate = ((md = mode) < 0))
1831 <                break;
1832 <            else if (w.phase >= 0)
1833 <                break;
1834 <            else if (checkInterrupt = !checkInterrupt)
1835 <                Thread.interrupted();
1836 <            else if ((c = ctl) == prevCtl)
1837 <                Thread.onSpinWait();         // signal in progress
1838 <            else if ((md & SMASK) + (int)(c >> RC_SHIFT) > 0)
1839 <                LockSupport.park();          // not the only idle worker
1840 <            else if (!canStop()) {           // check racing submissions
1841 <                if (ctl == initialCtl &&
1842 <                    compareAndSetCtl(initialCtl, prevCtl))
1843 <                    w.phase = phase;         // self-signal
1844 <            }
1845 <            else if (terminate = tryTerminate(false, false))
1846 <                break;
1847 <            else {                           // timed wait or drop
1848 <                long now = System.currentTimeMillis();
1849 <                if (deadline == 0L && (deadline = now + keepAlive) == 0L)
1850 <                    deadline = 1L;           // avoid 0
1851 <                if (ctl != c)
1852 <                    ;                        // stale
1853 <                else if (deadline - now > TIMEOUT_SLOP)
1674 <                    LockSupport.parkUntil(deadline);
1675 <                else if (((int)c & SMASK) == (w.config & SMASK) &&
1676 <                         compareAndSetCtl(c, ((UC_MASK & (c - TC_UNIT)) |
1677 <                                              (prevCtl & SP_MASK)))) {
1678 <                    w.config |= QUIET;       // sentinel for deregisterWorker
1679 <                    terminate = true;        // drop on timeout
1817 >        if (w == null)
1818 >            return -1;                           // currently impossible
1819 >        int p = (w.phase + SS_SEQ) & ~INACTIVE;  // advance phase
1820 >        boolean idle = false;                    // true if possibly quiescent
1821 >        if (runState < 0)
1822 >            return -1;                           // terminating
1823 >        long sp = p & SP_MASK, pc = ctl, qc;
1824 >        w.phase = p | INACTIVE;
1825 >        do {                                     // enqueue
1826 >            w.stackPred = (int)pc;               // set ctl stack link
1827 >        } while (pc != (pc = compareAndExchangeCtl(
1828 >                            pc, qc = ((pc - RC_UNIT) & UC_MASK) | sp)));
1829 >        if ((qc & RC_MASK) <= 0L)
1830 >            idle = true;
1831 >        WorkQueue[] qs = queues; // to spin for expected #accesses in scan+signal
1832 >        int spins = ((qs == null) ? 0 : ((qs.length & SMASK) << 1)) + 4, rs;
1833 >        if (idle && hasSubmissions() && w.phase < 0)
1834 >            reactivate();                        // check for stragglers
1835 >        if ((rs = runState) < 0 ||
1836 >            (rs != 0 && idle && tryTerminate(false, false)))
1837 >            return -1;                           // quiescent termination
1838 >        while ((p = w.phase) < 0 && --spins > 0)
1839 >            Thread.onSpinWait();                 // spin before block
1840 >        if (p < 0) {                             // await signal
1841 >            long deadline = (idle) ? keepAlive + System.currentTimeMillis() : 0L;
1842 >            LockSupport.setCurrentBlocker(this);
1843 >            for (;;) {
1844 >                w.access = PARKED;               // enable unpark
1845 >                if (w.phase < 0) {
1846 >                    if (idle)
1847 >                        LockSupport.parkUntil(deadline);
1848 >                    else
1849 >                        LockSupport.park();
1850 >                }
1851 >                w.access = 0;                    // disable unpark
1852 >                if (w.phase >= 0) {
1853 >                    LockSupport.setCurrentBlocker(null);
1854                      break;
1855                  }
1856 <                else
1857 <                    deadline = 0L;           // not at head; restart timeout
1856 >                Thread.interrupted();            // clear status for next park
1857 >                if (idle) {                      // check for idle timeout
1858 >                    if (deadline - System.currentTimeMillis() < TIMEOUT_SLOP) {
1859 >                        if (tryTrim(w))
1860 >                            return -1;
1861 >                        else                     // not at head; restart timer
1862 >                            deadline += keepAlive;
1863 >                    }
1864 >                }
1865              }
1866          }
1867 <        LockSupport.setCurrentBlocker(null);
1687 <        return terminate ? -1 : 0;
1867 >        return (runState < 0) ? -1 : 0;
1868      }
1869  
1870      /**
1871 <     * Returns true if can start terminating if enabled, or already terminated
1871 >     * Non-overridable version of isQuiescent. Returns true if
1872 >     * quiescent or already terminating.
1873       */
1874      private boolean canStop() {
1875 <        outer: for (long c = ctl, oldSum = 0L;;) { // repeat until stable
1876 <            int md; WorkQueue[] qs;
1877 <            if ((qs = queues) == null || ((md = mode) & STOP) != 0)
1878 <                return true;
1879 <            if ((md & SMASK) + (int)(c >> RC_SHIFT) > 0)
1880 <                break;                             // active workers
1881 <            long checkSum = 0L;
1882 <            boolean rescan = false;
1883 <            for (int i = 0; i < qs.length; i += 2) { // scan racing submissions
1884 <                WorkQueue q; ForkJoinTask<?>[] a; int s = 0, cap;
1885 <                if ((q = qs[i]) != null) {
1886 <                    if ((a = q.array) != null && (cap = a.length) > 0 &&
1887 <                        ((s = q.top) != q.base || a[(cap - 1) & s] != null))
1888 <                        break outer;
1889 <                    else if (q.source != 0)
1890 <                        rescan = true;              // retry if locked
1891 <                }
1892 <                checkSum += (((long)i) << 32) ^ s;
1875 >        long c = ctl;
1876 >        do {
1877 >            if (runState < 0)
1878 >                break;
1879 >            if ((c & RC_MASK) > 0L || hasSubmissions())
1880 >                return false;
1881 >        } while (c != (c = ctl));  // validate
1882 >        return true;
1883 >    }
1884 >
1885 >    /**
1886 >     * Scans for and returns a polled task, if available.  Used only
1887 >     * for untracked polls. Begins scan at a random index to avoid
1888 >     * systematic unfairness.
1889 >     *
1890 >     * @param submissionsOnly if true, only scan submission queues
1891 >     */
1892 >    private ForkJoinTask<?> pollScan(boolean submissionsOnly) {
1893 >        int r = ThreadLocalRandom.nextSecondarySeed();
1894 >        if (submissionsOnly)                    // even indices only
1895 >            r &= ~1;
1896 >        int step = (submissionsOnly) ? 2 : 1;
1897 >        WorkQueue[] qs; int n; WorkQueue q; ForkJoinTask<?> t;
1898 >        if (runState >= 0 && (qs = queues) != null && (n = qs.length) > 0) {
1899 >            for (int i = n; i > 0; i -= step, r += step) {
1900 >                if ((q = qs[r & (n - 1)]) != null &&
1901 >                    (t = q.poll(this)) != null)
1902 >                    return t;
1903              }
1713            if (c == (c = ctl) && checkSum == oldSum && !rescan && queues == qs)
1714                return true;
1715            oldSum = checkSum;
1904          }
1905 <        return (mode & STOP) != 0; // recheck mode on false return
1905 >        return null;
1906      }
1907  
1908      /**
# Line 1726 | Line 1914 | public class ForkJoinPool extends Abstra
1914       * unblocked.
1915       *
1916       * @param c incoming ctl value
1917 +     * @param canSaturate to override saturate predicate
1918       * @return UNCOMPENSATE: block then adjust, 0: block, -1 : retry
1919       */
1920 <    private int tryCompensate(long c) {
1920 >    private int tryCompensate(long c, boolean canSaturate) {
1921          Predicate<? super ForkJoinPool> sat;
1922 <        int md = mode, b = bounds;
1923 <        // counts are signed; centered at parallelism level == 0
1922 >        long b = bounds;                               // unpack fields
1923 >        int pc = parallelism;
1924          int minActive = (short)(b & SMASK),
1925 <            maxTotal  = b >>> SWIDTH,
1926 <            active    = (int)(c >> RC_SHIFT),
1925 >            maxTotal  = (short)(b >>> SWIDTH) + pc,
1926 >            active    = (short)(c >>> RC_SHIFT),
1927              total     = (short)(c >>> TC_SHIFT),
1928 <            sp        = (int)c & ~UNSIGNALLED;
1929 <        if ((md & SMASK) == 0)
1930 <            return 0;                  // cannot compensate if parallelism zero
1931 <        else if (total >= 0) {
1932 <            if (sp != 0) {                        // activate idle worker
1933 <                WorkQueue[] qs; int n; WorkQueue v;
1934 <                if ((qs = queues) != null && (n = qs.length) > 0 &&
1935 <                    (v = qs[sp & (n - 1)]) != null) {
1936 <                    Thread vt = v.owner;
1937 <                    long nc = ((long)v.stackPred & SP_MASK) | (UC_MASK & c);
1938 <                    if (compareAndSetCtl(c, nc)) {
1939 <                        v.phase = sp;
1751 <                        LockSupport.unpark(vt);
1752 <                        return UNCOMPENSATE;
1753 <                    }
1928 >            sp        = (int)c & ~INACTIVE;
1929 >        if (runState < 0)                              // terminating
1930 >            return -1;
1931 >        else if (sp != 0 && active <= pc) {            // activate idle worker
1932 >            WorkQueue[] qs; WorkQueue v; int i;
1933 >            if (ctl == c && (qs = queues) != null &&
1934 >                qs.length > (i = sp & SMASK) && (v = qs[i]) != null) {
1935 >                long nc = (v.stackPred & SP_MASK) | (UC_MASK & c);
1936 >                if (compareAndSetCtl(c, nc)) {
1937 >                    v.phase = sp;
1938 >                    LockSupport.unpark(v.owner);
1939 >                    return UNCOMPENSATE;
1940                  }
1755                return -1;                        // retry
1756            }
1757            else if (active > minActive) {        // reduce parallelism
1758                long nc = ((RC_MASK & (c - RC_UNIT)) | (~RC_MASK & c));
1759                return compareAndSetCtl(c, nc) ? UNCOMPENSATE : -1;
1941              }
1942 +            return -1;                                  // retry
1943 +        }
1944 +        else if (active > minActive && total >= pc) {   // reduce active workers
1945 +            long nc = ((RC_MASK & (c - RC_UNIT)) | (~RC_MASK & c));
1946 +            return compareAndSetCtl(c, nc) ? UNCOMPENSATE : -1;
1947          }
1948 <        if (total < maxTotal) {                   // expand pool
1948 >        else if (total < maxTotal && total < MAX_CAP) { // expand pool
1949              long nc = ((c + TC_UNIT) & TC_MASK) | (c & ~TC_MASK);
1950              return (!compareAndSetCtl(c, nc) ? -1 :
1951                      !createWorker() ? 0 : UNCOMPENSATE);
1952          }
1953 <        else if (!compareAndSetCtl(c, c))         // validate
1953 >        else if (!compareAndSetCtl(c, c))               // validate
1954              return -1;
1955 <        else if ((sat = saturate) != null && sat.test(this))
1955 >        else if (canSaturate || ((sat = saturate) != null && sat.test(this)))
1956              return 0;
1957          else
1958              throw new RejectedExecutionException(
# Line 1781 | Line 1967 | public class ForkJoinPool extends Abstra
1967      }
1968  
1969      /**
1970 <     * Helps if possible until the given task is done.  Scans other
1971 <     * queues for a task produced by one of w's stealers; returning
1972 <     * compensated blocking sentinel if none are found.
1970 >     * Helps if possible until the given task is done.  Processes
1971 >     * compatible local tasks and scans other queues for task produced
1972 >     * by w's stealers; returning compensated blocking sentinel if
1973 >     * none are found.
1974       *
1975       * @param task the task
1976       * @param w caller's WorkQueue
1977 <     * @param canHelp if false, compensate only
1977 >     * @param timed true if this is a timed join
1978       * @return task status on exit, or UNCOMPENSATE for compensated blocking
1979       */
1980 <    final int helpJoin(ForkJoinTask<?> task, WorkQueue w, boolean canHelp) {
1981 <        int s = 0;
1982 <        if (task != null && w != null) {
1983 <            int wsrc = w.source, wid = w.config & SMASK, r = wid + 2;
1984 <            boolean scan = true;
1985 <            long c = 0L;                          // track ctl stability
1986 <            outer: for (;;) {
1987 <                if ((s = task.status) < 0)
1988 <                    break;
1989 <                else if (scan = !scan) {          // previous scan was empty
1990 <                    if (mode < 0)
1991 <                        ForkJoinTask.cancelIgnoringExceptions(task);
1992 <                    else if (c == (c = ctl) && (s = tryCompensate(c)) >= 0)
1993 <                        break;                    // block
1994 <                }
1995 <                else if (canHelp) {               // scan for subtasks
1996 <                    WorkQueue[] qs = queues;
1997 <                    int n = (qs == null) ? 0 : qs.length, m = n - 1;
1998 <                    for (int i = n; i > 0; i -= 2, r += 2) {
1999 <                        int j; WorkQueue q, x, y; ForkJoinTask<?>[] a;
2000 <                        if ((q = qs[j = r & m]) != null) {
2001 <                            int sq = q.source & SMASK, cap, b;
2002 <                            if ((a = q.array) != null && (cap = a.length) > 0) {
2003 <                                int k = (cap - 1) & (b = q.base);
2004 <                                int nextBase = b + 1, src = j | SRC, sx;
2005 <                                ForkJoinTask<?> t = WorkQueue.getSlot(a, k);
2006 <                                boolean eligible = sq == wid ||
2007 <                                    ((x = qs[sq & m]) != null &&   // indirect
2008 <                                     ((sx = (x.source & SMASK)) == wid ||
2009 <                                      ((y = qs[sx & m]) != null && // 2-indirect
2010 <                                       (y.source & SMASK) == wid)));
2011 <                                if ((s = task.status) < 0)
2012 <                                    break outer;
1826 <                                else if ((q.source & SMASK) != sq ||
1827 <                                         q.base != b)
1828 <                                    scan = true;          // inconsistent
1829 <                                else if (t == null)
1830 <                                    scan |= (a[nextBase & (cap - 1)] != null ||
1831 <                                             q.top != b); // lagging
1832 <                                else if (eligible) {
1833 <                                    if (WorkQueue.casSlotToNull(a, k, t)) {
1834 <                                        q.base = nextBase;
1835 <                                        w.source = src;
1836 <                                        t.doExec();
1837 <                                        w.source = wsrc;
1838 <                                    }
1839 <                                    scan = true;
1840 <                                    break;
1841 <                                }
1980 >    final int helpJoin(ForkJoinTask<?> task, WorkQueue w, boolean timed) {
1981 >        if (w == null || task == null)
1982 >            return 0;
1983 >        int wsrc = w.source, wid = (w.config & SMASK) | SRC, r = wid + 2;
1984 >        long sctl = 0L;                               // track stability
1985 >        for (boolean rescan = true;;) {
1986 >            int s; WorkQueue[] qs;
1987 >            if ((s = task.status) < 0)
1988 >                return s;
1989 >            if (!rescan && sctl == (sctl = ctl) &&
1990 >                (s = tryCompensate(sctl, timed)) >= 0)
1991 >                return s;                              // block
1992 >            rescan = false;
1993 >            if (runState < 0)
1994 >                return 0;
1995 >            int n = ((qs = queues) == null) ? 0 : qs.length, m = n - 1;
1996 >            scan: for (int i = n >>> 1; i > 0; --i, r += 2) {
1997 >                int j, cap; WorkQueue q; ForkJoinTask<?>[] a;
1998 >                if ((q = qs[j = r & m]) != null && (a = q.array) != null &&
1999 >                    (cap = a.length) > 0) {
2000 >                    for (int src = j | SRC;;) {
2001 >                        int sq = q.source, b = q.base;
2002 >                        int k = (cap - 1) & b, nb = b + 1;
2003 >                        ForkJoinTask<?> t = a[k];
2004 >                        U.loadFence();                // for re-reads
2005 >                        boolean eligible = true;      // check steal chain
2006 >                        for (int d = n, v = sq;;) {   // may be cyclic; bound
2007 >                            WorkQueue p;
2008 >                            if (v == wid)
2009 >                                break;
2010 >                            if (v == 0 || --d == 0 || (p = qs[v & m]) == null) {
2011 >                                eligible = false;
2012 >                                break;
2013                              }
2014 +                            v = p.source;
2015 +                        }
2016 +                        if (q.source != sq || q.base != b)
2017 +                            ;                          // stale
2018 +                        else if ((s = task.status) < 0)
2019 +                            return s;                  // recheck before taking
2020 +                        else if (t == null) {
2021 +                            if (a[k] == null) {
2022 +                                if (!rescan && eligible &&
2023 +                                    (q.array != a || q.top != b))
2024 +                                    rescan = true;     // resized or stalled
2025 +                                break;
2026 +                            }
2027 +                        }
2028 +                        else if (t != task && !eligible)
2029 +                            break;
2030 +                        else if (WorkQueue.casSlotToNull(a, k, t)) {
2031 +                            q.base = nb;
2032 +                            w.source = src;
2033 +                            t.doExec();
2034 +                            w.source = wsrc;
2035 +                            rescan = true;
2036 +                            break scan;
2037                          }
2038                      }
2039                  }
2040              }
2041          }
2042 <        return s;
1849 <    }
2042 >     }
2043  
2044      /**
2045 <     * Extra helpJoin steps for CountedCompleters.  Scans for and runs
1853 <     * subtasks of the given root task, returning if none are found.
2045 >     * Version of helpJoin for CountedCompleters.
2046       *
2047 <     * @param task root of CountedCompleter computation
2047 >     * @param task the task
2048       * @param w caller's WorkQueue
2049 <     * @param owned true if owned by a ForkJoinWorkerThread
2050 <     * @return task status on exit
2049 >     * @param owned true if w is owned by a ForkJoinWorkerThread
2050 >     * @param timed true if this is a timed join
2051 >     * @return task status on exit, or UNCOMPENSATE for compensated blocking
2052       */
2053 <    final int helpComplete(ForkJoinTask<?> task, WorkQueue w, boolean owned) {
2054 <        int s = 0;
2055 <        if (task != null && w != null) {
2056 <            int r = w.config;
2057 <            boolean scan = true, locals = true;
2058 <            long c = 0L;
2059 <            outer: for (;;) {
2060 <                if (locals) {                     // try locals before scanning
2061 <                    if ((s = w.helpComplete(task, owned, 0)) < 0)
2062 <                        break;
2063 <                    locals = false;
2064 <                }
2065 <                else if ((s = task.status) < 0)
2066 <                    break;
2067 <                else if (scan = !scan) {
2068 <                    if (c == (c = ctl))
2069 <                        break;
2070 <                }
2071 <                else {                            // scan for subtasks
2072 <                    WorkQueue[] qs = queues;
2073 <                    int n = (qs == null) ? 0 : qs.length;
2074 <                    for (int i = n; i > 0; --i, ++r) {
2075 <                        int j, cap, b; WorkQueue q; ForkJoinTask<?>[] a;
2076 <                        boolean eligible = false;
2077 <                        if ((q = qs[j = r & (n - 1)]) != null &&
2078 <                            (a = q.array) != null && (cap = a.length) > 0) {
2079 <                            int k = (cap - 1) & (b = q.base), nextBase = b + 1;
2080 <                            ForkJoinTask<?> t = WorkQueue.getSlot(a, k);
2081 <                            if (t instanceof CountedCompleter) {
2082 <                                CountedCompleter<?> f = (CountedCompleter<?>)t;
2083 <                                do {} while (!(eligible = (f == task)) &&
2084 <                                             (f = f.completer) != null);
2085 <                            }
2086 <                            if ((s = task.status) < 0)
2087 <                                break outer;
2088 <                            else if (q.base != b)
2089 <                                scan = true;       // inconsistent
1897 <                            else if (t == null)
1898 <                                scan |= (a[nextBase & (cap - 1)] != null ||
1899 <                                         q.top != b);
1900 <                            else if (eligible) {
1901 <                                if (WorkQueue.casSlotToNull(a, k, t)) {
1902 <                                    q.setBaseOpaque(nextBase);
1903 <                                    t.doExec();
1904 <                                    locals = true;
1905 <                                }
1906 <                                scan = true;
2053 >    final int helpComplete(ForkJoinTask<?> task, WorkQueue w, boolean owned,
2054 >                           boolean timed) {
2055 >        if (w == null || task == null)
2056 >            return 0;
2057 >        int wsrc = w.source, r = w.config;
2058 >        long sctl = 0L;                               // track stability
2059 >        for (boolean rescan = true;;) {
2060 >            int s; WorkQueue[] qs;
2061 >            if ((s = w.helpComplete(task, owned, 0)) < 0)
2062 >                return s;
2063 >            if (!rescan && sctl == (sctl = ctl)) {
2064 >                if (!owned)
2065 >                    return 0;
2066 >                if ((s = tryCompensate(sctl, timed)) >= 0)
2067 >                    return s;
2068 >            }
2069 >            rescan = false;
2070 >            if (runState < 0)
2071 >                return 0;
2072 >            int n = ((qs = queues) == null) ? 0 : qs.length, m = n - 1;
2073 >            scan: for (int i = n; i > 0; --i, ++r) {
2074 >                int j, cap; WorkQueue q; ForkJoinTask<?>[] a;
2075 >                if ((q = qs[j = r & m]) != null && (a = q.array) != null &&
2076 >                    (cap = a.length) > 0) {
2077 >                    poll: for (int src = j | SRC, b = q.base;;) {
2078 >                        int k = (cap - 1) & b, nb = b + 1;
2079 >                        ForkJoinTask<?> t = a[k];
2080 >                        U.loadFence();                // for re-reads
2081 >                        if (b != (b = q.base))
2082 >                            ;                         // stale
2083 >                        else if ((s = task.status) < 0)
2084 >                            return s;                 // recheck before taking
2085 >                        else if (t == null) {
2086 >                            if (a[k] == null) {
2087 >                                if (!rescan &&        // resized or stalled
2088 >                                    (q.array != a || q.top != b))
2089 >                                    rescan = true;
2090                                  break;
2091                              }
2092                          }
2093 +                        else if (t instanceof CountedCompleter) {
2094 +                            CountedCompleter<?> f;
2095 +                            for (f = (CountedCompleter<?>)t;;) {
2096 +                                if (f == task)
2097 +                                    break;
2098 +                                else if ((f = f.completer) == null)
2099 +                                    break poll;       // ineligible
2100 +                            }
2101 +                            if (WorkQueue.casSlotToNull(a, k, t)) {
2102 +                                q.base = nb;
2103 +                                w.source = src;
2104 +                                t.doExec();
2105 +                                w.source = wsrc;
2106 +                                rescan = true;
2107 +                                break scan;
2108 +                            }
2109 +                        }
2110 +                        else
2111 +                            break;
2112                      }
2113                  }
2114              }
2115          }
2116 <        return s;
1915 <    }
1916 <
1917 <    /**
1918 <     * Scans for and returns a polled task, if available.  Used only
1919 <     * for untracked polls. Begins scan at an index (scanRover)
1920 <     * advanced on each call, to avoid systematic unfairness.
1921 <     *
1922 <     * @param submissionsOnly if true, only scan submission queues
1923 <     */
1924 <    private ForkJoinTask<?> pollScan(boolean submissionsOnly) {
1925 <        VarHandle.acquireFence();
1926 <        int r = scanRover += 0x61c88647; // Weyl increment; raciness OK
1927 <        if (submissionsOnly)             // even indices only
1928 <            r &= ~1;
1929 <        int step = (submissionsOnly) ? 2 : 1;
1930 <        WorkQueue[] qs; int n;
1931 <        while ((qs = queues) != null && (n = qs.length) > 0) {
1932 <            boolean scan = false;
1933 <            for (int i = 0; i < n; i += step) {
1934 <                int j, cap, b; WorkQueue q; ForkJoinTask<?>[] a;
1935 <                if ((q = qs[j = (n - 1) & (r + i)]) != null &&
1936 <                    (a = q.array) != null && (cap = a.length) > 0) {
1937 <                    int k = (cap - 1) & (b = q.base), nextBase = b + 1;
1938 <                    ForkJoinTask<?> t = WorkQueue.getSlot(a, k);
1939 <                    if (q.base != b)
1940 <                        scan = true;
1941 <                    else if (t == null)
1942 <                        scan |= (q.top != b || a[nextBase & (cap - 1)] != null);
1943 <                    else if (!WorkQueue.casSlotToNull(a, k, t))
1944 <                        scan = true;
1945 <                    else {
1946 <                        q.setBaseOpaque(nextBase);
1947 <                        return t;
1948 <                    }
1949 <                }
1950 <            }
1951 <            if (!scan && queues == qs)
1952 <                break;
1953 <        }
1954 <        return null;
1955 <    }
2116 >     }
2117  
2118      /**
2119       * Runs tasks until {@code isQuiescent()}. Rather than blocking
# Line 1963 | Line 2124 | public class ForkJoinPool extends Abstra
2124       * @param interruptible true if return on interrupt
2125       * @return positive if quiescent, negative if interrupted, else 0
2126       */
2127 <    final int helpQuiescePool(WorkQueue w, long nanos, boolean interruptible) {
1967 <        if (w == null)
1968 <            return 0;
2127 >    private int helpQuiesce(WorkQueue w, long nanos, boolean interruptible) {
2128          long startTime = System.nanoTime(), parkTime = 0L;
2129 <        int prevSrc = w.source, wsrc = prevSrc, cfg = w.config, r = cfg + 1;
2130 <        for (boolean active = true, locals = true;;) {
2131 <            boolean busy = false, scan = false;
2132 <            if (locals) {  // run local tasks before (re)polling
2133 <                locals = false;
2134 <                for (ForkJoinTask<?> u; (u = w.nextLocalTask(cfg)) != null;)
2129 >        int phase; // w.phase set negative when temporarily quiescent
2130 >        if (w == null || (phase = w.phase) < 0)
2131 >            return 0;
2132 >        int activePhase = phase, inactivePhase = phase | INACTIVE;
2133 >        int wsrc = w.source, r = 0;
2134 >        for (boolean locals = true;;) {
2135 >            WorkQueue[] qs; WorkQueue q;
2136 >            if (runState < 0) {             // terminating
2137 >                w.phase = activePhase;
2138 >                return 1;
2139 >            }
2140 >            if (locals) {                   // run local tasks before (re)polling
2141 >                for (ForkJoinTask<?> u; (u = w.nextLocalTask()) != null;)
2142                      u.doExec();
2143              }
2144 <            WorkQueue[] qs = queues;
2145 <            int n = (qs == null) ? 0 : qs.length;
2146 <            for (int i = n; i > 0; --i, ++r) {
2147 <                int j, b, cap; WorkQueue q; ForkJoinTask<?>[] a;
2148 <                if ((q = qs[j = (n - 1) & r]) != null && q != w &&
2149 <                    (a = q.array) != null && (cap = a.length) > 0) {
2150 <                    int k = (cap - 1) & (b = q.base);
2151 <                    int nextBase = b + 1, src = j | SRC;
2152 <                    ForkJoinTask<?> t = WorkQueue.getSlot(a, k);
2153 <                    if (q.base != b)
2154 <                        busy = scan = true;
2155 <                    else if (t != null) {
2156 <                        busy = scan = true;
2157 <                        if (!active) {    // increment before taking
2158 <                            active = true;
2159 <                            getAndAddCtl(RC_UNIT);
2144 >            boolean rescan = false, busy = locals = false, interrupted;
2145 >            int n = ((qs = queues) == null) ? 0 : qs.length, m = n - 1;
2146 >            scan: for (int i = n, j; i > 0; --i, ++r) {
2147 >                if ((q = qs[j = m & r]) != null && q != w) {
2148 >                    for (int src = j | SRC;;) {
2149 >                        ForkJoinTask<?>[] a = q.array;
2150 >                        int b = q.base, cap;
2151 >                        if (a == null || (cap = a.length) <= 0)
2152 >                            break;
2153 >                        int k = (cap - 1) & b, nb = b + 1, nk = (cap - 1) & nb;
2154 >                        ForkJoinTask<?> t = a[k];
2155 >                        U.loadFence();      // for re-reads
2156 >                        if (q.base != b || q.array != a || a[k] != t)
2157 >                            ;
2158 >                        else if (t == null) {
2159 >                            if (!rescan) {
2160 >                                if (a[nk] != null || q.top - b > 0)
2161 >                                    rescan = true;
2162 >                                else if (!busy &&
2163 >                                         q.owner != null && q.phase >= 0)
2164 >                                    busy = true;
2165 >                            }
2166 >                            break;
2167                          }
2168 <                        if (WorkQueue.casSlotToNull(a, k, t)) {
2169 <                            q.base = nextBase;
2168 >                        else if (phase < 0) // reactivate before taking
2169 >                            w.phase = phase = activePhase;
2170 >                        else if (WorkQueue.casSlotToNull(a, k, t)) {
2171 >                            q.base = nb;
2172                              w.source = src;
2173                              t.doExec();
2174 <                            w.source = wsrc = prevSrc;
2175 <                            locals = true;
2174 >                            w.source = wsrc;
2175 >                            rescan = locals = true;
2176 >                            break scan;
2177                          }
2002                        break;
2003                    }
2004                    else if (!busy) {
2005                        if (q.top != b || a[nextBase & (cap - 1)] != null)
2006                            busy = scan = true;
2007                        else if (q.source != QUIET && q.phase >= 0)
2008                            busy = true;
2178                      }
2179                  }
2180              }
2181 <            VarHandle.acquireFence();
2182 <            if (!scan && queues == qs) {
2183 <                boolean interrupted;
2184 <                if (!busy) {
2185 <                    w.source = prevSrc;
2186 <                    if (!active)
2187 <                        getAndAddCtl(RC_UNIT);
2188 <                    return 1;
2189 <                }
2190 <                if (wsrc != QUIET)
2191 <                    w.source = wsrc = QUIET;
2192 <                if (active) {                 // decrement
2193 <                    active = false;
2194 <                    parkTime = 0L;
2195 <                    getAndAddCtl(RC_MASK & -RC_UNIT);
2196 <                }
2197 <                else if (parkTime == 0L) {
2198 <                    parkTime = 1L << 10; // initially about 1 usec
2199 <                    Thread.yield();
2200 <                }
2201 <                else if ((interrupted = interruptible && Thread.interrupted()) ||
2202 <                         System.nanoTime() - startTime > nanos) {
2203 <                    getAndAddCtl(RC_UNIT);
2035 <                    return interrupted ? -1 : 0;
2036 <                }
2037 <                else {
2038 <                    LockSupport.parkNanos(this, parkTime);
2039 <                    if (parkTime < nanos >>> 8 && parkTime < 1L << 20)
2040 <                        parkTime <<= 1;  // max sleep approx 1 sec or 1% nanos
2041 <                }
2181 >            if (rescan)
2182 >                ;                   // retry
2183 >            else if (phase >= 0) {
2184 >                parkTime = 0L;
2185 >                w.phase = phase = inactivePhase;
2186 >            }
2187 >            else if (!busy) {
2188 >                w.phase = activePhase;
2189 >                return 1;
2190 >            }
2191 >            else if (parkTime == 0L) {
2192 >                parkTime = 1L << 10; // initially about 1 usec
2193 >                Thread.yield();
2194 >            }
2195 >            else if ((interrupted = interruptible && Thread.interrupted()) ||
2196 >                     System.nanoTime() - startTime > nanos) {
2197 >                w.phase = activePhase;
2198 >                return interrupted ? -1 : 0;
2199 >            }
2200 >            else {
2201 >                LockSupport.parkNanos(this, parkTime);
2202 >                if (parkTime < nanos >>> 8 && parkTime < 1L << 20)
2203 >                    parkTime <<= 1;  // max sleep approx 1 sec or 1% nanos
2204              }
2205          }
2206      }
# Line 2050 | Line 2212 | public class ForkJoinPool extends Abstra
2212       * @param interruptible true if return on interrupt
2213       * @return positive if quiescent, negative if interrupted, else 0
2214       */
2215 <    final int externalHelpQuiescePool(long nanos, boolean interruptible) {
2215 >    private int externalHelpQuiesce(long nanos, boolean interruptible) {
2216          for (long startTime = System.nanoTime(), parkTime = 0L;;) {
2217              ForkJoinTask<?> t;
2218              if ((t = pollScan(false)) != null) {
# Line 2076 | Line 2238 | public class ForkJoinPool extends Abstra
2238      }
2239  
2240      /**
2241 +     * Helps quiesce from either internal or external caller
2242 +     *
2243 +     * @param pool the pool to use, or null if any
2244 +     * @param nanos max wait time (Long.MAX_VALUE if effectively untimed)
2245 +     * @param interruptible true if return on interrupt
2246 +     * @return positive if quiescent, negative if interrupted, else 0
2247 +     */
2248 +    final static int helpQuiescePool(ForkJoinPool pool, long nanos,
2249 +                                     boolean interruptible) {
2250 +        Thread t; ForkJoinPool p; ForkJoinWorkerThread wt;
2251 +        if ((t = Thread.currentThread()) instanceof ForkJoinWorkerThread &&
2252 +            (p = (wt = (ForkJoinWorkerThread)t).pool) != null &&
2253 +            (p == pool || pool == null))
2254 +            return p.helpQuiesce(wt.workQueue, nanos, interruptible);
2255 +        else if ((p = pool) != null || (p = common) != null)
2256 +            return p.externalHelpQuiesce(nanos, interruptible);
2257 +        else
2258 +            return 0;
2259 +    }
2260 +
2261 +    /**
2262       * Gets and removes a local or stolen task for the given worker.
2263       *
2264       * @return a task, if available
2265       */
2266      final ForkJoinTask<?> nextTaskFor(WorkQueue w) {
2267          ForkJoinTask<?> t;
2268 <        if (w == null || (t = w.nextLocalTask(w.config)) == null)
2268 >        if (w == null || (t = w.nextLocalTask()) == null)
2269              t = pollScan(false);
2270          return t;
2271      }
# Line 2091 | Line 2274 | public class ForkJoinPool extends Abstra
2274  
2275      /**
2276       * Finds and locks a WorkQueue for an external submitter, or
2277 <     * returns null if shutdown or terminating.
2277 >     * throws RejectedExecutionException if shutdown or terminating.
2278 >     * @param isSubmit false if this is for a common pool fork
2279       */
2280 <    final WorkQueue submissionQueue() {
2280 >    final WorkQueue submissionQueue(boolean isSubmit) {
2281          int r;
2282 +        ReentrantLock lock = registrationLock;
2283          if ((r = ThreadLocalRandom.getProbe()) == 0) {
2284              ThreadLocalRandom.localInit();           // initialize caller's probe
2285              r = ThreadLocalRandom.getProbe();
2286          }
2287 <        for (int id = r << 1;;) {                    // even indices only
2288 <            int md = mode, n, i; WorkQueue q; ReentrantLock lock;
2289 <            WorkQueue[] qs = queues;
2290 <            if ((md & SHUTDOWN) != 0 || qs == null || (n = qs.length) <= 0)
2291 <                return null;
2292 <            else if ((q = qs[i = (n - 1) & id]) == null) {
2293 <                if ((lock = registrationLock) != null) {
2294 <                    WorkQueue w = new WorkQueue(id | SRC);
2295 <                    lock.lock();                    // install under lock
2296 <                    if (qs[i] == null)
2297 <                        qs[i] = w;                  // else lost race; discard
2287 >        if (lock != null) {                          // else init error
2288 >            for (int id = r << 1;;) {                // even indices only
2289 >                int n, i; WorkQueue[] qs; WorkQueue q;
2290 >                if ((qs = queues) == null || (n = qs.length) <= 0)
2291 >                    break;
2292 >                else if ((q = qs[i = (n - 1) & id]) == null) {
2293 >                    WorkQueue w = new WorkQueue(null, id | SRC);
2294 >                    w.array = new ForkJoinTask<?>[INITIAL_QUEUE_CAPACITY];
2295 >                    lock.lock();                     // install under lock
2296 >                    if (queues == qs && qs[i] == null)
2297 >                        qs[i] = w;                   // else lost race; discard
2298                      lock.unlock();
2299                  }
2300 +                else if (q.getAndSetAccess(1) != 0)  // move and restart
2301 +                    id = (r = ThreadLocalRandom.advanceProbe(r)) << 1;
2302 +                else if (isSubmit && runState != 0) {
2303 +                    q.access = 0;                    // check while lock held
2304 +                    break;
2305 +                }
2306 +                else
2307 +                    return q;
2308              }
2116            else if (!q.tryLock())                  // move and restart
2117                id = (r = ThreadLocalRandom.advanceProbe(r)) << 1;
2118            else
2119                return q;
2309          }
2310 +        throw new RejectedExecutionException();
2311      }
2312  
2313      /**
2314 <     * Adds the given task to an external submission queue, or throws
2315 <     * exception if shutdown or terminating.
2126 <     *
2127 <     * @param task the task. Caller must ensure non-null.
2128 <     */
2129 <    final void externalPush(ForkJoinTask<?> task) {
2130 <        WorkQueue q;
2131 <        if ((q = submissionQueue()) == null)
2132 <            throw new RejectedExecutionException(); // shutdown or disabled
2133 <        else if (q.lockedPush(task))
2134 <            signalWork();
2135 <    }
2136 <
2137 <    // relay from ForkJoinTask to avoid unnecessary initialization
2138 <    static final void externalPushCommon(ForkJoinTask<?> task) {
2139 <        Common.pool.externalPush(task);
2140 <    }
2141 <
2142 <    /**
2143 <     * Pushes a possibly-external submission.
2314 >     * Pushes a submission to the pool, using internal queue if called
2315 >     * from ForkJoinWorkerThread, else external queue.
2316       */
2317 <    private <T> ForkJoinTask<T> externalSubmit(ForkJoinTask<T> task) {
2318 <        Thread t; ForkJoinWorkerThread wt; WorkQueue q;
2319 <        if (task == null)
2320 <            throw new NullPointerException();
2317 >    private <T> ForkJoinTask<T> poolSubmit(boolean signalIfEmpty,
2318 >                                           ForkJoinTask<T> task) {
2319 >        WorkQueue q; Thread t; ForkJoinWorkerThread wt;
2320 >        if (task == null) throw new NullPointerException();
2321          if (((t = Thread.currentThread()) instanceof ForkJoinWorkerThread) &&
2322 <            (q = (wt = (ForkJoinWorkerThread)t).workQueue) != null &&
2323 <            wt.pool == this)
2324 <            q.push(task, this);
2325 <        else if ((q = submissionQueue()) == null)
2326 <            throw new RejectedExecutionException(); // shutdown or disabled
2327 <        else if (q.lockedPush(task))
2328 <            signalWork();
2322 >            (wt = (ForkJoinWorkerThread)t).pool == this)
2323 >            q = wt.workQueue;
2324 >        else {
2325 >            task.markPoolSubmission();
2326 >            q = submissionQueue(true);
2327 >        }
2328 >        q.push(task, this, signalIfEmpty);
2329          return task;
2330      }
2331  
2332      /**
2333 <     * Pushes a possibly-external task without signalling. currently unused
2333 >     * Returns queue for an external thread, if one exists that has
2334 >     * possibly ever submitted to the given pool (nonzero probe), or
2335 >     * null if none.
2336       */
2337 <    /*
2338 <    private <T> ForkJoinTask<T> unsignalledSubmit(ForkJoinTask<T> task) {
2339 <        Thread t; ForkJoinWorkerThread wt; WorkQueue q;
2340 <        if (task == null)
2341 <            throw new NullPointerException();
2342 <        if (((t = Thread.currentThread()) instanceof ForkJoinWorkerThread) &&
2169 <            (q = (wt = (ForkJoinWorkerThread)t).workQueue) != null &&
2170 <            wt.pool == this)
2171 <            q.push(task, null);
2172 <        else if ((q = submissionQueue()) == null)
2173 <            throw new RejectedExecutionException(); // shutdown or disabled
2174 <        else
2175 <            q.lockedPush(task);
2176 <        return task;
2337 >    private static WorkQueue externalQueue(ForkJoinPool p) {
2338 >        WorkQueue[] qs;
2339 >        int r = ThreadLocalRandom.getProbe(), n;
2340 >        return (p != null && (qs = p.queues) != null &&
2341 >                (n = qs.length) > 0 && r != 0) ?
2342 >            qs[(n - 1) & (r << 1)] : null;
2343      }
2178    */
2344  
2345      /**
2346 <     * Returns common pool queue for an external thread that has
2182 <     * possibly ever submitted a common pool task (nonzero probe), or
2183 <     * null if none.
2346 >     * Returns external queue for common pool.
2347       */
2348      static WorkQueue commonQueue() {
2349 <        ForkJoinPool p; WorkQueue[] qs;
2187 <        int r = ThreadLocalRandom.getProbe(), n;
2188 <        return ((p = Common.pool) != null && (qs = p.queues) != null &&
2189 <                (n = qs.length) > 0 && r != 0) ?
2190 <            qs[(n - 1) & (r << 1)] : null;
2349 >        return externalQueue(common);
2350      }
2351  
2352      /**
2353       * Returns queue for an external thread, if one exists
2354       */
2355      final WorkQueue externalQueue() {
2356 <        WorkQueue[] qs;
2198 <        int r = ThreadLocalRandom.getProbe(), n;
2199 <        return ((qs = queues) != null && (n = qs.length) > 0 && r != 0) ?
2200 <            qs[(n - 1) & (r << 1)] : null;
2356 >        return externalQueue(this);
2357      }
2358  
2359      /**
# Line 2264 | Line 2420 | public class ForkJoinPool extends Abstra
2420          if (((t = Thread.currentThread()) instanceof ForkJoinWorkerThread) &&
2421              (pool = (wt = (ForkJoinWorkerThread)t).pool) != null &&
2422              (q = wt.workQueue) != null) {
2267            int p = pool.mode & SMASK;
2268            int a = p + (int)(pool.ctl >> RC_SHIFT);
2423              int n = q.top - q.base;
2424 +            int p = pool.parallelism;
2425 +            int a = (short)(pool.ctl >>> RC_SHIFT);
2426              return n - (a > (p >>>= 1) ? 0 :
2427                          a > (p >>>= 1) ? 1 :
2428                          a > (p >>>= 1) ? 2 :
# Line 2279 | Line 2435 | public class ForkJoinPool extends Abstra
2435      // Termination
2436  
2437      /**
2438 <     * Possibly initiates and/or completes termination.
2438 >     * Possibly initiates and/or completes pool termination.
2439       *
2440       * @param now if true, unconditionally terminate, else only
2441       * if no work and no active workers
# Line 2287 | Line 2443 | public class ForkJoinPool extends Abstra
2443       * @return true if terminating or terminated
2444       */
2445      private boolean tryTerminate(boolean now, boolean enable) {
2446 <        int md; // try to set SHUTDOWN, then STOP, then help terminate
2447 <        if (((md = mode) & UNSTOPPABLE) != 0)
2448 <            return false;                // for common pool
2449 <        if ((md & SHUTDOWN) == 0) {
2450 <            if (!enable)
2451 <                return false;
2452 <            md = getAndBitwiseOrMode(SHUTDOWN);
2453 <        }
2454 <        if ((md & STOP) == 0) {
2455 <            if (!now && !canStop())
2456 <                return false;
2457 <            md = getAndBitwiseOrMode(STOP);
2458 <        }
2459 <        for (boolean rescan = true;;) { // repeat until no changes
2460 <            boolean changed = false;
2461 <            for (ForkJoinTask<?> t; (t = pollScan(false)) != null; ) {
2462 <                changed = true;
2463 <                ForkJoinTask.cancelIgnoringExceptions(t); // help cancel
2464 <            }
2465 <            WorkQueue[] qs; int n; WorkQueue q; Thread thread;
2466 <            if ((qs = queues) != null && (n = qs.length) > 0) {
2467 <                for (int j = 1; j < n; j += 2) { // unblock other workers
2468 <                    if ((q = qs[j]) != null && (thread = q.owner) != null &&
2469 <                        !thread.isInterrupted()) {
2470 <                        changed = true;
2446 >        int rs; ReentrantLock lock; Condition cond;
2447 >        if ((rs = runState) >= 0) {                 // set SHUTDOWN and/or STOP
2448 >            if ((config & ISCOMMON) != 0)
2449 >                return false;                       // cannot shutdown
2450 >            if (!now) {
2451 >                if ((rs & SHUTDOWN) == 0) {
2452 >                    if (!enable)
2453 >                        return false;
2454 >                    getAndBitwiseOrRunState(SHUTDOWN);
2455 >                }
2456 >                if (!canStop())
2457 >                    return false;
2458 >            }
2459 >            getAndBitwiseOrRunState(SHUTDOWN | STOP);
2460 >        }
2461 >        WorkQueue released = reactivate();          // try signalling waiter
2462 >        int tc = (short)(ctl >>> TC_SHIFT);
2463 >        if (released == null && tc > 0) {           // help unblock and cancel
2464 >            Thread current = Thread.currentThread();
2465 >            WorkQueue w = ((current instanceof ForkJoinWorkerThread) ?
2466 >                           ((ForkJoinWorkerThread)current).workQueue : null);
2467 >            int r = (w == null) ? 0 : w.config + 1; // stagger traversals
2468 >            WorkQueue[] qs = queues;
2469 >            int n = (qs == null) ? 0 : qs.length;
2470 >            for (int i = 0; i < n; ++i) {
2471 >                WorkQueue q; Thread thread;
2472 >                if ((q = qs[(r + i) & (n - 1)]) != null &&
2473 >                    (thread = q.owner) != current && q.access != STOP) {
2474 >                    if (thread != null && !thread.isInterrupted()) {
2475 >                        q.forcePhaseActive();      // for awaitWork
2476                          try {
2477                              thread.interrupt();
2478                          } catch (Throwable ignore) {
2479                          }
2480                      }
2481 +                    for (ForkJoinTask<?> t; (t = q.poll(null)) != null; )
2482 +                        ForkJoinTask.cancelIgnoringExceptions(t);
2483                  }
2484              }
2485 <            ReentrantLock lock; Condition cond; // signal when no workers
2486 <            if (((md = mode) & TERMINATED) == 0 &&
2487 <                (md & SMASK) + (short)(ctl >>> TC_SHIFT) <= 0 &&
2488 <                (getAndBitwiseOrMode(TERMINATED) & TERMINATED) == 0 &&
2489 <                (lock = registrationLock) != null) {
2490 <                lock.lock();
2491 <                if ((cond = termination) != null)
2492 <                    cond.signalAll();
2493 <                lock.unlock();
2331 <            }
2332 <            if (changed)
2333 <                rescan = true;
2334 <            else if (rescan)
2335 <                rescan = false;
2336 <            else
2337 <                break;
2485 >        }
2486 >        if ((tc <= 0 || (short)(ctl >>> TC_SHIFT) <= 0) &&
2487 >            (getAndBitwiseOrRunState(TERMINATED) & TERMINATED) == 0 &&
2488 >            (lock = registrationLock) != null) {
2489 >            lock.lock();                            // signal when no workers
2490 >            if ((cond = termination) != null)
2491 >                cond.signalAll();
2492 >            lock.unlock();
2493 >            // container.close(); // for loom
2494          }
2495          return true;
2496      }
# Line 2510 | Line 2666 | public class ForkJoinPool extends Abstra
2666              throw new IllegalArgumentException();
2667          if (factory == null || unit == null)
2668              throw new NullPointerException();
2669 +        this.parallelism = p;
2670          this.factory = factory;
2671          this.ueh = handler;
2672          this.saturate = saturate;
2673 +        this.config = asyncMode ? FIFO : 0;
2674          this.keepAlive = Math.max(unit.toMillis(keepAliveTime), TIMEOUT_SLOP);
2517        int size = 1 << (33 - Integer.numberOfLeadingZeros(p - 1));
2675          int corep = Math.min(Math.max(corePoolSize, p), MAX_CAP);
2676 <        int maxSpares = Math.min(maximumPoolSize, MAX_CAP) - p;
2677 <        int minAvail = Math.min(Math.max(minimumRunnable, 0), MAX_CAP);
2678 <        this.bounds = ((minAvail - p) & SMASK) | (maxSpares << SWIDTH);
2679 <        this.mode = p | (asyncMode ? FIFO : 0);
2680 <        this.ctl = ((((long)(-corep) << TC_SHIFT) & TC_MASK) |
2524 <                    (((long)(-p)     << RC_SHIFT) & RC_MASK));
2676 >        int maxSpares = Math.max(0, Math.min(maximumPoolSize - p, MAX_CAP));
2677 >        int minAvail = Math.max(0, Math.min(minimumRunnable, MAX_CAP));
2678 >        this.bounds = (long)(minAvail & SMASK) | (long)(maxSpares << SWIDTH) |
2679 >            ((long)corep << 32);
2680 >        int size = 1 << (33 - Integer.numberOfLeadingZeros(p - 1));
2681          this.registrationLock = new ReentrantLock();
2682          this.queues = new WorkQueue[size];
2683          String pid = Integer.toString(getAndAddPoolIds(1) + 1);
2684 <        this.workerNamePrefix = "ForkJoinPool-" + pid + "-worker-";
2685 <    }
2686 <
2531 <    // helper method for commonPool constructor
2532 <    private static Object newInstanceFromSystemProperty(String property)
2533 <        throws ReflectiveOperationException {
2534 <        String className = System.getProperty(property);
2535 <        return (className == null)
2536 <            ? null
2537 <            : ClassLoader.getSystemClassLoader().loadClass(className)
2538 <            .getConstructor().newInstance();
2684 >        String name = "ForkJoinPool-" + pid;
2685 >        this.workerNamePrefix = name + "-worker-";
2686 >        //        this.container = SharedThreadContainer.create(name); // for loom
2687      }
2688  
2689      /**
# Line 2543 | Line 2691 | public class ForkJoinPool extends Abstra
2691       * overridden by system properties
2692       */
2693      private ForkJoinPool(byte forCommonPoolOnly) {
2694 <        int parallelism = Math.max(1, Runtime.getRuntime().availableProcessors() - 1);
2547 <        int maxSpares = DEFAULT_COMMON_MAX_SPARES;
2548 <        ForkJoinWorkerThreadFactory fac = null;
2694 >        ForkJoinWorkerThreadFactory fac = defaultForkJoinWorkerThreadFactory;
2695          UncaughtExceptionHandler handler = null;
2696 +        int maxSpares = DEFAULT_COMMON_MAX_SPARES;
2697 +        int pc = 0, preset = 0; // nonzero if size set as property
2698          try {  // ignore exceptions in accessing/parsing properties
2551            fac = (ForkJoinWorkerThreadFactory) newInstanceFromSystemProperty(
2552                "java.util.concurrent.ForkJoinPool.common.threadFactory");
2553            handler = (UncaughtExceptionHandler) newInstanceFromSystemProperty(
2554                "java.util.concurrent.ForkJoinPool.common.exceptionHandler");
2699              String pp = System.getProperty
2700                  ("java.util.concurrent.ForkJoinPool.common.parallelism");
2701 <            String msp = System.getProperty
2701 >            if (pp != null) {
2702 >                pc = Math.max(0, Integer.parseInt(pp));
2703 >                preset = PRESET_SIZE;
2704 >            }
2705 >            String ms = System.getProperty
2706                  ("java.util.concurrent.ForkJoinPool.common.maximumSpares");
2707 <            if (pp != null)
2708 <                parallelism = Integer.parseInt(pp);
2709 <            if (msp != null)
2710 <                maxSpares = Integer.parseInt(msp);
2707 >            if (ms != null)
2708 >                maxSpares = Math.max(0, Math.min(MAX_CAP, Integer.parseInt(ms)));
2709 >            String sf = System.getProperty
2710 >                ("java.util.concurrent.ForkJoinPool.common.threadFactory");
2711 >            String sh = System.getProperty
2712 >                ("java.util.concurrent.ForkJoinPool.common.exceptionHandler");
2713 >            if (sf != null || sh != null) {
2714 >                ClassLoader ldr = ClassLoader.getSystemClassLoader();
2715 >                if (sf != null)
2716 >                    fac = (ForkJoinWorkerThreadFactory)
2717 >                        ldr.loadClass(sf).getConstructor().newInstance();
2718 >                if (sh != null)
2719 >                    handler = (UncaughtExceptionHandler)
2720 >                        ldr.loadClass(sh).getConstructor().newInstance();
2721 >            }
2722          } catch (Exception ignore) {
2723          }
2724 +        if (preset == 0)
2725 +            pc = Math.max(1, Runtime.getRuntime().availableProcessors() - 1);
2726 +        int p = Math.min(pc, MAX_CAP);
2727 +        int size = (p == 0) ? 1 : 1 << (33 - Integer.numberOfLeadingZeros(p-1));
2728 +        this.parallelism = p;
2729 +        this.config = ISCOMMON | preset;
2730 +        this.bounds = (long)(1 | (maxSpares << SWIDTH));
2731 +        this.factory = fac;
2732          this.ueh = handler;
2733          this.keepAlive = DEFAULT_KEEPALIVE;
2734          this.saturate = null;
2735          this.workerNamePrefix = null;
2569        int p = Math.min(Math.max(parallelism, 0), MAX_CAP), size;
2570        this.mode = p | UNSTOPPABLE;
2571        if (p > 0) {
2572            size = 1 << (33 - Integer.numberOfLeadingZeros(p - 1));
2573            this.bounds = ((1 - p) & SMASK) | (maxSpares << SWIDTH);
2574            this.ctl = ((((long)(-p) << TC_SHIFT) & TC_MASK) |
2575                        (((long)(-p) << RC_SHIFT) & RC_MASK));
2576        } else {  // zero min, max, spare counts, 1 slot
2577            size = 1;
2578            this.bounds = 0;
2579            this.ctl = 0L;
2580        }
2581        this.factory = (fac != null) ? fac :
2582            new DefaultCommonPoolForkJoinWorkerThreadFactory();
2583        this.queues = new WorkQueue[size];
2736          this.registrationLock = new ReentrantLock();
2737 <    }
2738 <
2587 <    /**
2588 <     * Singleton class for the Common Pool to reduce static loading
2589 <     * dependencies
2590 <     */
2591 <    static final class Common {
2592 <        /**
2593 <         * Common (static) pool. Non-null for public use unless a static
2594 <         * construction exception, but internal usages null-check on use
2595 <         * to paranoically avoid potential initialization circularities
2596 <         * as well as to simplify generated code.
2597 <         */
2598 <        static final ForkJoinPool pool;
2599 <
2600 <        /**
2601 <         * Common pool parallelism. To allow simpler use and management
2602 <         * when common pool threads are disabled, we allow the underlying
2603 <         * common.parallelism field to be zero, but in that case still report
2604 <         * parallelism as 1 to reflect resulting caller-runs mechanics.
2605 <         */
2606 <        static final int parallelism;
2607 <
2608 <        static {
2609 <            @SuppressWarnings("removal")
2610 <            ForkJoinPool p =
2611 <                AccessController.doPrivileged(new PrivilegedAction<>() {
2612 <                    public ForkJoinPool run() {
2613 <                        return new ForkJoinPool((byte)0); }});
2614 <            pool = p;
2615 <            parallelism = Math.max(p.mode & SMASK, 1);
2616 <        }
2737 >        this.queues = new WorkQueue[size];
2738 >        //        this.container = SharedThreadContainer.create("ForkJoinPool.commonPool"); // for loom
2739      }
2740  
2741      /**
# Line 2630 | Line 2752 | public class ForkJoinPool extends Abstra
2752       * @since 1.8
2753       */
2754      public static ForkJoinPool commonPool() {
2755 <        return Common.pool;
2755 >        // assert common != null : "static init error";
2756 >        return common;
2757      }
2758  
2759      // Execution methods
# Line 2653 | Line 2776 | public class ForkJoinPool extends Abstra
2776       *         scheduled for execution
2777       */
2778      public <T> T invoke(ForkJoinTask<T> task) {
2779 <        externalSubmit(task);
2780 <        return task.joinForPoolInvoke(this);
2779 >        poolSubmit(true, task);
2780 >        return task.join();
2781      }
2782  
2783      /**
# Line 2666 | Line 2789 | public class ForkJoinPool extends Abstra
2789       *         scheduled for execution
2790       */
2791      public void execute(ForkJoinTask<?> task) {
2792 <        externalSubmit(task);
2792 >        poolSubmit(true, task);
2793      }
2794  
2795      // AbstractExecutorService methods
# Line 2679 | Line 2802 | public class ForkJoinPool extends Abstra
2802      @Override
2803      @SuppressWarnings("unchecked")
2804      public void execute(Runnable task) {
2805 <        externalSubmit((task instanceof ForkJoinTask<?>)
2806 <                       ? (ForkJoinTask<Void>) task // avoid re-wrap
2807 <                       : new ForkJoinTask.RunnableExecuteAction(task));
2805 >        poolSubmit(true, (task instanceof ForkJoinTask<?>)
2806 >                   ? (ForkJoinTask<Void>) task // avoid re-wrap
2807 >                   : new ForkJoinTask.RunnableExecuteAction(task));
2808      }
2809  
2810      /**
# Line 2695 | Line 2818 | public class ForkJoinPool extends Abstra
2818       *         scheduled for execution
2819       */
2820      public <T> ForkJoinTask<T> submit(ForkJoinTask<T> task) {
2821 <        return externalSubmit(task);
2821 >        return poolSubmit(true, task);
2822      }
2823  
2824      /**
# Line 2705 | Line 2828 | public class ForkJoinPool extends Abstra
2828       */
2829      @Override
2830      public <T> ForkJoinTask<T> submit(Callable<T> task) {
2831 <        return externalSubmit(new ForkJoinTask.AdaptedCallable<T>(task));
2831 >        return poolSubmit(true, new ForkJoinTask.AdaptedCallable<T>(task));
2832      }
2833  
2834      /**
# Line 2715 | Line 2838 | public class ForkJoinPool extends Abstra
2838       */
2839      @Override
2840      public <T> ForkJoinTask<T> submit(Runnable task, T result) {
2841 <        return externalSubmit(new ForkJoinTask.AdaptedRunnable<T>(task, result));
2841 >        return poolSubmit(true, new ForkJoinTask.AdaptedRunnable<T>(task, result));
2842      }
2843  
2844      /**
# Line 2726 | Line 2849 | public class ForkJoinPool extends Abstra
2849      @Override
2850      @SuppressWarnings("unchecked")
2851      public ForkJoinTask<?> submit(Runnable task) {
2852 <        return externalSubmit((task instanceof ForkJoinTask<?>)
2853 <            ? (ForkJoinTask<Void>) task // avoid re-wrap
2854 <            : new ForkJoinTask.AdaptedRunnableAction(task));
2852 >        return poolSubmit(true, (task instanceof ForkJoinTask<?>)
2853 >                          ? (ForkJoinTask<Void>) task // avoid re-wrap
2854 >                          : new ForkJoinTask.AdaptedRunnableAction(task));
2855      }
2856  
2857 <    // Potential added methods
2857 >    // Added mainly for possible use in Loom
2858 >
2859      /**
2860       * Submits the given task without guaranteeing that it will
2861 <     * eventually execute in the absence of available active threads
2862 <     * or invocations of {@link #activateThread activateThread}. In
2863 <     * some contexts, this method may reduce contention and overhead
2864 <     * by either deferring processing or relying on context-specific
2865 <     * knowledge that existing threads (possibly including the calling
2742 <     * thread if operating in this pool) will eventually be available
2743 <     * to execute the task.
2861 >     * eventually execute in the absence of available active threads.
2862 >     * In some contexts, this method may reduce contention and
2863 >     * overhead by relying on context-specific knowledge that existing
2864 >     * threads (possibly including the calling thread if operating in
2865 >     * this pool) will eventually be available to execute the task.
2866       *
2867       * @param task the task
2868       * @param <T> the type of the task's result
2869       * @return the task
2870       */
2871 <    /*
2872 <     public <T> ForkJoinTask<T> lazySubmit(ForkJoinTask<T> task) {
2751 <        return unsignalledSubmit(task);
2871 >    public <T> ForkJoinTask<T> lazySubmit(ForkJoinTask<T> task) {
2872 >        return poolSubmit(false, task);
2873      }
2874 <    */
2874 >
2875      /**
2876 <     * If there are fewer than {@link #getParallelism getParallelism}
2877 <     * active threads in this pool, activates one to process
2878 <     * tasks. This thread may in turn activate others if discovers or
2879 <     * generates additional tasks. This method may be used in
2880 <     * conjunction with {@link #lazySubmit lazySubmit} to defer task
2881 <     * processing.
2876 >     * Changes the target parallelism of this pool, controlling the
2877 >     * future creation, use, and termination of worker threads.
2878 >     * Applications include contexts in which the number of available
2879 >     * processors changes over time.
2880 >     *
2881 >     * @param size the target parallelism level
2882 >     * @return the previous parallelism level.
2883 >     * @throws IllegalArgumentException if size is less than 1 or
2884 >     *         greater than the maximum supported by this
2885 >     *         pool (currently 32767).
2886 >     * @throws IllegalStateException if this is the{@link #commonPool()} and
2887 >     *         parallelism level was set by System property
2888 >     *         {@systemProperty java.util.concurrent.ForkJoinPool.common.parallelism}.
2889 >     * @throws SecurityException if a security manager exists and
2890 >     *         the caller is not permitted to modify threads
2891 >     *         because it does not hold {@link
2892 >     *         java.lang.RuntimePermission}{@code ("modifyThread")}
2893       */
2894 <    /*
2895 <    public void activateThread() {
2896 <        signalWork();
2894 >    public int setParallelism(int size) {
2895 >        if (size < 1 || size > MAX_CAP)
2896 >            throw new IllegalArgumentException();
2897 >        if ((config & PRESET_SIZE) != 0)
2898 >            throw new IllegalStateException("Cannot override System property");
2899 >        checkPermission();
2900 >        return getAndSetParallelism(size);
2901      }
2766    */
2902  
2903      /**
2904       * @throws NullPointerException       {@inheritDoc}
# Line 2777 | Line 2912 | public class ForkJoinPool extends Abstra
2912                  ForkJoinTask<T> f =
2913                      new ForkJoinTask.AdaptedInterruptibleCallable<T>(t);
2914                  futures.add(f);
2915 <                externalSubmit(f);
2915 >                poolSubmit(true, f);
2916              }
2917              for (int i = futures.size() - 1; i >= 0; --i)
2918 <                ((ForkJoinTask<?>)futures.get(i)).awaitPoolInvoke(this);
2918 >                ((ForkJoinTask<?>)futures.get(i)).quietlyJoin();
2919              return futures;
2920          } catch (Throwable t) {
2921              for (Future<T> e : futures)
# Line 2800 | Line 2935 | public class ForkJoinPool extends Abstra
2935                  ForkJoinTask<T> f =
2936                      new ForkJoinTask.AdaptedInterruptibleCallable<T>(t);
2937                  futures.add(f);
2938 <                externalSubmit(f);
2938 >                poolSubmit(true, f);
2939              }
2940              long startTime = System.nanoTime(), ns = nanos;
2941              boolean timedOut = (ns < 0L);
2942              for (int i = futures.size() - 1; i >= 0; --i) {
2943 <                Future<T> f = futures.get(i);
2943 >                ForkJoinTask<T> f = (ForkJoinTask<T>)futures.get(i);
2944                  if (!f.isDone()) {
2945 +                    if (!timedOut)
2946 +                        timedOut = !f.quietlyJoin(ns, TimeUnit.NANOSECONDS);
2947                      if (timedOut)
2948                          ForkJoinTask.cancelIgnoringExceptions(f);
2949 <                    else {
2950 <                        ((ForkJoinTask<T>)f).awaitPoolInvoke(this, ns);
2814 <                        if ((ns = nanos - (System.nanoTime() - startTime)) < 0L)
2815 <                            timedOut = true;
2816 <                    }
2949 >                    else
2950 >                        ns = nanos - (System.nanoTime() - startTime);
2951                  }
2952              }
2953              return futures;
# Line 2840 | Line 2974 | public class ForkJoinPool extends Abstra
2974              Throwable ex = null;
2975              boolean failed;
2976              if (c == null || Thread.interrupted() ||
2977 <                (pool != null && pool.mode < 0))
2977 >                (pool != null && pool.runState < 0))
2978                  failed = true;
2979              else if (isDone())
2980                  failed = false;
# Line 2853 | Line 2987 | public class ForkJoinPool extends Abstra
2987                      failed = true;
2988                  }
2989              }
2990 <            if ((pool != null && pool.mode < 0) ||
2990 >            if ((pool != null && pool.runState < 0) ||
2991                  (failed && count.getAndDecrement() <= 1))
2992                  trySetThrown(ex != null ? ex : new CancellationException());
2993          }
# Line 2910 | Line 3044 | public class ForkJoinPool extends Abstra
3044                      throw new NullPointerException();
3045                  InvokeAnyTask<T> f = new InvokeAnyTask<T>(root, c);
3046                  fs.add(f);
3047 <                externalSubmit(f);
3047 >                poolSubmit(true, f);
3048                  if (root.isDone())
3049                      break;
3050              }
3051 <            return root.getForPoolInvoke(this);
3051 >            return root.get();
3052          } finally {
3053              for (InvokeAnyTask<T> f : fs)
3054                  ForkJoinTask.cancelIgnoringExceptions(f);
# Line 2937 | Line 3071 | public class ForkJoinPool extends Abstra
3071                      throw new NullPointerException();
3072                  InvokeAnyTask<T> f = new InvokeAnyTask<T>(root, c);
3073                  fs.add(f);
3074 <                externalSubmit(f);
3074 >                poolSubmit(true, f);
3075                  if (root.isDone())
3076                      break;
3077              }
3078 <            return root.getForPoolInvoke(this, nanos);
3078 >            return root.get(nanos, TimeUnit.NANOSECONDS);
3079          } finally {
3080              for (InvokeAnyTask<T> f : fs)
3081                  ForkJoinTask.cancelIgnoringExceptions(f);
# Line 2973 | Line 3107 | public class ForkJoinPool extends Abstra
3107       * @return the targeted parallelism level of this pool
3108       */
3109      public int getParallelism() {
3110 <        int par = mode & SMASK;
2977 <        return (par > 0) ? par : 1;
3110 >        return Math.max(getParallelismOpaque(), 1);
3111      }
3112  
3113      /**
# Line 2984 | Line 3117 | public class ForkJoinPool extends Abstra
3117       * @since 1.8
3118       */
3119      public static int getCommonPoolParallelism() {
3120 <        return Common.parallelism;
3120 >        return common.getParallelism();
3121      }
3122  
3123      /**
# Line 2996 | Line 3129 | public class ForkJoinPool extends Abstra
3129       * @return the number of worker threads
3130       */
3131      public int getPoolSize() {
3132 <        return ((mode & SMASK) + (short)(ctl >>> TC_SHIFT));
3132 >        return (short)(ctl >>> TC_SHIFT);
3133      }
3134  
3135      /**
# Line 3006 | Line 3139 | public class ForkJoinPool extends Abstra
3139       * @return {@code true} if this pool uses async mode
3140       */
3141      public boolean getAsyncMode() {
3142 <        return (mode & FIFO) != 0;
3142 >        return (config & FIFO) != 0;
3143      }
3144  
3145      /**
# Line 3018 | Line 3151 | public class ForkJoinPool extends Abstra
3151       * @return the number of worker threads
3152       */
3153      public int getRunningThreadCount() {
3021        VarHandle.acquireFence();
3154          WorkQueue[] qs; WorkQueue q;
3155          int rc = 0;
3156 <        if ((qs = queues) != null) {
3156 >        if ((runState & TERMINATED) == 0 && (qs = queues) != null) {
3157              for (int i = 1; i < qs.length; i += 2) {
3158                  if ((q = qs[i]) != null && q.isApparentlyUnblocked())
3159                      ++rc;
# Line 3038 | Line 3170 | public class ForkJoinPool extends Abstra
3170       * @return the number of active threads
3171       */
3172      public int getActiveThreadCount() {
3173 <        int r = (mode & SMASK) + (int)(ctl >> RC_SHIFT);
3042 <        return (r <= 0) ? 0 : r; // suppress momentarily negative values
3173 >        return Math.max((short)(ctl >>> RC_SHIFT), 0);
3174      }
3175  
3176      /**
# Line 3074 | Line 3205 | public class ForkJoinPool extends Abstra
3205          if ((qs = queues) != null) {
3206              for (int i = 1; i < qs.length; i += 2) {
3207                  if ((q = qs[i]) != null)
3208 <                    count += (long)q.nsteals & 0xffffffffL;
3208 >                     count += (long)q.nsteals & 0xffffffffL;
3209              }
3210          }
3211          return count;
# Line 3091 | Line 3222 | public class ForkJoinPool extends Abstra
3222       * @return the number of queued tasks
3223       */
3224      public long getQueuedTaskCount() {
3094        VarHandle.acquireFence();
3225          WorkQueue[] qs; WorkQueue q;
3226          int count = 0;
3227 <        if ((qs = queues) != null) {
3227 >        if ((runState & TERMINATED) == 0 && (qs = queues) != null) {
3228              for (int i = 1; i < qs.length; i += 2) {
3229                  if ((q = qs[i]) != null)
3230                      count += q.queueSize();
# Line 3111 | Line 3241 | public class ForkJoinPool extends Abstra
3241       * @return the number of queued submissions
3242       */
3243      public int getQueuedSubmissionCount() {
3114        VarHandle.acquireFence();
3244          WorkQueue[] qs; WorkQueue q;
3245          int count = 0;
3246 <        if ((qs = queues) != null) {
3246 >        if ((runState & TERMINATED) == 0 && (qs = queues) != null) {
3247              for (int i = 0; i < qs.length; i += 2) {
3248                  if ((q = qs[i]) != null)
3249                      count += q.queueSize();
# Line 3130 | Line 3259 | public class ForkJoinPool extends Abstra
3259       * @return {@code true} if there are any queued submissions
3260       */
3261      public boolean hasQueuedSubmissions() {
3262 <        VarHandle.acquireFence();
3134 <        WorkQueue[] qs; WorkQueue q;
3135 <        if ((qs = queues) != null) {
3136 <            for (int i = 0; i < qs.length; i += 2) {
3137 <                if ((q = qs[i]) != null && !q.isEmpty())
3138 <                    return true;
3139 <            }
3140 <        }
3141 <        return false;
3262 >        return (runState & TERMINATED) == 0 && hasSubmissions();
3263      }
3264  
3265      /**
# Line 3187 | Line 3308 | public class ForkJoinPool extends Abstra
3308       */
3309      public String toString() {
3310          // Use a single pass through queues to collect counts
3190        int md = mode; // read volatile fields first
3191        long c = ctl;
3311          long st = stealCount;
3312          long qt = 0L, ss = 0L; int rc = 0;
3313          WorkQueue[] qs; WorkQueue q;
# Line 3208 | Line 3327 | public class ForkJoinPool extends Abstra
3327              }
3328          }
3329  
3330 <        int pc = (md & SMASK);
3331 <        int tc = pc + (short)(c >>> TC_SHIFT);
3332 <        int ac = pc + (int)(c >> RC_SHIFT);
3330 >        int pc = parallelism;
3331 >        long c = ctl;
3332 >        int tc = (short)(c >>> TC_SHIFT);
3333 >        int ac = (short)(c >>> RC_SHIFT);
3334          if (ac < 0) // ignore transient negative
3335              ac = 0;
3336 <        String level = ((md & TERMINATED) != 0 ? "Terminated" :
3337 <                        (md & STOP)       != 0 ? "Terminating" :
3338 <                        (md & SHUTDOWN)   != 0 ? "Shutting down" :
3336 >        int rs = runState;
3337 >        String level = ((rs & TERMINATED) != 0 ? "Terminated" :
3338 >                        (rs & STOP)       != 0 ? "Terminating" :
3339 >                        (rs & SHUTDOWN)   != 0 ? "Shutting down" :
3340                          "Running");
3341          return super.toString() +
3342              "[" + level +
# Line 3278 | Line 3399 | public class ForkJoinPool extends Abstra
3399       * @return {@code true} if all tasks have completed following shut down
3400       */
3401      public boolean isTerminated() {
3402 <        return (mode & TERMINATED) != 0;
3402 >        return (runState & TERMINATED) != 0;
3403      }
3404  
3405      /**
# Line 3295 | Line 3416 | public class ForkJoinPool extends Abstra
3416       * @return {@code true} if terminating but not yet terminated
3417       */
3418      public boolean isTerminating() {
3419 <        return (mode & (STOP | TERMINATED)) == STOP;
3419 >        return (runState & (STOP | TERMINATED)) == STOP;
3420      }
3421  
3422      /**
# Line 3304 | Line 3425 | public class ForkJoinPool extends Abstra
3425       * @return {@code true} if this pool has been shut down
3426       */
3427      public boolean isShutdown() {
3428 <        return (mode & SHUTDOWN) != 0;
3428 >        return runState != 0;
3429      }
3430  
3431      /**
# Line 3323 | Line 3444 | public class ForkJoinPool extends Abstra
3444       */
3445      public boolean awaitTermination(long timeout, TimeUnit unit)
3446          throws InterruptedException {
3447 <        ReentrantLock lock; Condition cond;
3447 >        ReentrantLock lock; Condition cond; boolean terminated;
3448          long nanos = unit.toNanos(timeout);
3449 <        boolean terminated = false;
3450 <        if ((mode & UNSTOPPABLE) != 0) {
3330 <            Thread t; ForkJoinWorkerThread wt; int q;
3331 <            if ((t = Thread.currentThread()) instanceof ForkJoinWorkerThread &&
3332 <                (wt = (ForkJoinWorkerThread)t).pool == this)
3333 <                q = helpQuiescePool(wt.workQueue, nanos, true);
3334 <            else
3335 <                q = externalHelpQuiescePool(nanos, true);
3336 <            if (q < 0)
3449 >        if ((config & ISCOMMON) != 0) {
3450 >            if (helpQuiescePool(this, nanos, true) < 0)
3451                  throw new InterruptedException();
3452 +            terminated = false;
3453          }
3454 <        else if (!(terminated = tryTerminate(false, false)) &&
3455 <                 (lock = registrationLock) != null) {
3456 <            lock.lock();
3457 <            try {
3458 <                if ((cond = termination) == null)
3459 <                    termination = cond = lock.newCondition();
3460 <                while (!(terminated = ((mode & TERMINATED) != 0)) && nanos > 0L)
3461 <                    nanos = cond.awaitNanos(nanos);
3462 <            } finally {
3463 <                lock.unlock();
3454 >        else if (!(terminated = ((runState & TERMINATED) != 0))) {
3455 >            tryTerminate(false, false); // reduce transient blocking
3456 >            if ((lock = registrationLock) != null &&
3457 >                !(terminated = (((runState & TERMINATED) != 0)))) {
3458 >                lock.lock();
3459 >                try {
3460 >                    if ((cond = termination) == null)
3461 >                        termination = cond = lock.newCondition();
3462 >                    while (!(terminated = ((runState & TERMINATED) != 0)) &&
3463 >                           nanos > 0L)
3464 >                        nanos = cond.awaitNanos(nanos);
3465 >                } finally {
3466 >                    lock.unlock();
3467 >                }
3468              }
3469          }
3470          return terminated;
# Line 3363 | Line 3482 | public class ForkJoinPool extends Abstra
3482       * timeout elapsed.
3483       */
3484      public boolean awaitQuiescence(long timeout, TimeUnit unit) {
3485 <        Thread t; ForkJoinWorkerThread wt; int q;
3367 <        long nanos = unit.toNanos(timeout);
3368 <        if ((t = Thread.currentThread()) instanceof ForkJoinWorkerThread &&
3369 <            (wt = (ForkJoinWorkerThread)t).pool == this)
3370 <            q = helpQuiescePool(wt.workQueue, nanos, false);
3371 <        else
3372 <            q = externalHelpQuiescePool(nanos, false);
3373 <        return (q > 0);
3485 >        return (helpQuiescePool(this, unit.toNanos(timeout), false) > 0);
3486      }
3487  
3488      /**
# Line 3494 | Line 3606 | public class ForkJoinPool extends Abstra
3606              long c = ctl;
3607              if (blocker.isReleasable())
3608                  break;
3609 <            if ((comp = tryCompensate(c)) >= 0) {
3609 >            if ((comp = tryCompensate(c, false)) >= 0) {
3610                  long post = (comp == 0) ? 0L : RC_UNIT;
3611                  try {
3612                      done = blocker.block();
# Line 3528 | Line 3640 | public class ForkJoinPool extends Abstra
3640          return new ForkJoinTask.AdaptedCallable<T>(callable);
3641      }
3642  
3531
3643      static {
3644 <        Field pids;
3644 >        U = Unsafe.getUnsafe();
3645 >        Class<ForkJoinPool> klass = ForkJoinPool.class;
3646          try {
3647 <            pids = ForkJoinPool.class.getDeclaredField("poolIds");
3647 >            POOLIDS = U.staticFieldOffset(klass.getDeclaredField("poolIds"));
3648          } catch (NoSuchFieldException e) {
3649              throw new ExceptionInInitializerError(e);
3650          }
3651 <        U = Unsafe.getUnsafe();
3652 <        POOLIDS = U.staticFieldOffset(pids);
3653 <        CTL = U.objectFieldOffset(ForkJoinPool.class, "ctl");
3654 <        MODE = U.objectFieldOffset(ForkJoinPool.class, "mode");
3543 <        THREADIDS = U.objectFieldOffset(ForkJoinPool.class, "threadIds");
3651 >        CTL = U.objectFieldOffset(klass, "ctl");
3652 >        RUNSTATE = U.objectFieldOffset(klass, "runState");
3653 >        PARALLELISM = U.objectFieldOffset(klass, "parallelism");
3654 >        THREADIDS = U.objectFieldOffset(klass, "threadIds");
3655  
3545        modifyThreadPermission = new RuntimePermission("modifyThread");
3656          defaultForkJoinWorkerThreadFactory =
3657              new DefaultForkJoinWorkerThreadFactory();
3658 <        // Reduce the risk of rare disastrous classloading in first call to
3659 <        // LockSupport.park: https://bugs.openjdk.java.net/browse/JDK-8074773
3660 <        Class<?> ensureLoaded = LockSupport.class;
3658 >        @SuppressWarnings("removal")
3659 >        ForkJoinPool p = common = (System.getSecurityManager() == null) ?
3660 >            new ForkJoinPool((byte)0) :
3661 >            AccessController.doPrivileged(new PrivilegedAction<>() {
3662 >                    public ForkJoinPool run() {
3663 >                        return new ForkJoinPool((byte)0); }});
3664 >        Class<?> dep = LockSupport.class; // ensure loaded
3665      }
3666   }

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines