ViewVC Help
View File | Revision Log | Show Annotations | Download File | Root Listing
root/jsr166/jsr166/src/jsr166y/LinkedTransferQueue.java
(Generate patch)

Comparing jsr166/src/jsr166y/LinkedTransferQueue.java (file contents):
Revision 1.49 by jsr166, Thu Oct 22 15:58:44 2009 UTC vs.
Revision 1.50 by dl, Sat Oct 24 12:29:57 2009 UTC

# Line 161 | Line 161 | public class LinkedTransferQueue<E> exte
161       * targets.  Even when using very small slack values, this
162       * approach works well for dual queues because it allows all
163       * operations up to the point of matching or appending an item
164 <     * (hence potentially releasing another thread) to be read-only,
165 <     * thus not introducing any further contention. As described
166 <     * below, we implement this by performing slack maintenance
167 <     * retries only after these points.
164 >     * (hence potentially allowing progress by another thread) to be
165 >     * read-only, thus not introducing any further contention. As
166 >     * described below, we implement this by performing slack
167 >     * maintenance retries only after these points.
168       *
169       * As an accompaniment to such techniques, traversal overhead can
170       * be further reduced without increasing contention of head
171 <     * pointer updates.  During traversals, threads may sometimes
172 <     * shortcut the "next" link path from the current "head" node to
173 <     * be closer to the currently known first unmatched node. Again,
174 <     * this may be triggered with using thresholds or randomization.
171 >     * pointer updates: Threads may sometimes shortcut the "next" link
172 >     * path from the current "head" node to be closer to the currently
173 >     * known first unmatched node, and similarly for tail. Again, this
174 >     * may be triggered with using thresholds or randomization.
175       *
176       * These ideas must be further extended to avoid unbounded amounts
177       * of costly-to-reclaim garbage caused by the sequential "next"
# Line 199 | Line 199 | public class LinkedTransferQueue<E> exte
199       * mechanics because an update may leave head at a detached node.
200       * And while direct writes are possible for tail updates, they
201       * increase the risk of long retraversals, and hence long garbage
202 <     * chains which can be much more costly than is worthwhile
202 >     * chains, which can be much more costly than is worthwhile
203       * considering that the cost difference of performing a CAS vs
204       * write is smaller when they are not triggered on each operation
205       * (especially considering that writes and CASes equally require
# Line 207 | Line 207 | public class LinkedTransferQueue<E> exte
207       * more costly than the writes themselves because of contention).
208       *
209       * Removal of interior nodes (due to timed out or interrupted
210 <     * waits, or calls to remove or Iterator.remove) uses a scheme
211 <     * roughly similar to that in Scherer, Lea, and Scott's
212 <     * SynchronousQueue. Given a predecessor, we can unsplice any node
213 <     * except the (actual) tail of the queue. To avoid build-up of
214 <     * cancelled trailing nodes, upon a request to remove a trailing
215 <     * node, it is placed in field "cleanMe" to be unspliced upon the
216 <     * next call to unsplice any other node.  Situations needing such
217 <     * mechanics are not common but do occur in practice; for example
218 <     * when an unbounded series of short timed calls to poll
219 <     * repeatedly time out but never otherwise fall off the list
220 <     * because of an untimed call to take at the front of the
221 <     * queue. (Note that maintaining field cleanMe does not otherwise
222 <     * much impact garbage retention even if never cleared by some
223 <     * other call because the held node will eventually either
224 <     * directly or indirectly lead to a self-link once off the list.)
210 >     * waits, or calls to remove(x) or Iterator.remove) can use a
211 >     * scheme roughly similar to that described in Scherer, Lea, and
212 >     * Scott's SynchronousQueue. Given a predecessor, we can unsplice
213 >     * any node except the (actual) tail of the queue. To avoid
214 >     * build-up of cancelled trailing nodes, upon a request to remove
215 >     * a trailing node, it is placed in field "cleanMe" to be
216 >     * unspliced upon the next call to unsplice any other node.
217 >     * Situations needing such mechanics are not common but do occur
218 >     * in practice; for example when an unbounded series of short
219 >     * timed calls to poll repeatedly time out but never otherwise
220 >     * fall off the list because of an untimed call to take at the
221 >     * front of the queue. Note that maintaining field cleanMe does
222 >     * not otherwise much impact garbage retention even if never
223 >     * cleared by some other call because the held node will
224 >     * eventually either directly or indirectly lead to a self-link
225 >     * once off the list.
226       *
227       * *** Overview of implementation ***
228       *
229 <     * We use a threshold-based approach to updates, with a target
230 <     * slack of two.  The slack value is hard-wired: a path greater
229 >     * We use a threshold-based approach to updates, with a slack
230 >     * threshold of two -- that is, we update head/tail when the
231 >     * current pointer appears to be two or more steps away from the
232 >     * first/last node. The slack value is hard-wired: a path greater
233       * than one is naturally implemented by checking equality of
234       * traversal pointers except when the list has only one element,
235 <     * in which case we keep target slack at one. Avoiding tracking
235 >     * in which case we keep slack threshold at one. Avoiding tracking
236       * explicit counts across method calls slightly simplifies an
237       * already-messy implementation. Using randomization would
238       * probably work better if there were a low-quality dirt-cheap
239       * per-thread one available, but even ThreadLocalRandom is too
240       * heavy for these purposes.
241       *
242 <     * With such a small target slack value, it is rarely worthwhile
243 <     * to augment this with path short-circuiting; i.e., unsplicing
244 <     * nodes between head and the first unmatched node, or similarly
245 <     * for tail, rather than advancing head or tail proper. However,
246 <     * it is used (in awaitMatch) immediately before a waiting thread
247 <     * starts to block, as a final bit of helping at a point when
248 <     * contention with others is extremely unlikely (since if other
249 <     * threads that could release it are operating, then the current
250 <     * thread wouldn't be blocking).
242 >     * With such a small slack threshold value, it is rarely
243 >     * worthwhile to augment this with path short-circuiting; i.e.,
244 >     * unsplicing nodes between head and the first unmatched node, or
245 >     * similarly for tail, rather than advancing head or tail
246 >     * proper. However, it is used (in awaitMatch) immediately before
247 >     * a waiting thread starts to block, as a final bit of helping at
248 >     * a point when contention with others is extremely unlikely
249 >     * (since if other threads that could release it are operating,
250 >     * then the current thread wouldn't be blocking).
251       *
252       * We allow both the head and tail fields to be null before any
253       * nodes are enqueued; initializing upon first append.  This
# Line 260 | Line 263 | public class LinkedTransferQueue<E> exte
263       * of offer, put, poll, take, or transfer (each possibly with
264       * timeout). The relative complexity of using one monolithic
265       * method outweighs the code bulk and maintenance problems of
266 <     * using nine separate methods.
266 >     * using separate methods for each case.
267       *
268       * Operation consists of up to three phases. The first is
269       * implemented within method xfer, the second in tryAppend, and
# Line 285 | Line 288 | public class LinkedTransferQueue<E> exte
288       *
289       * 2. Try to append a new node (method tryAppend)
290       *
291 <     *    Starting at current tail pointer, try to append a new node
292 <     *    to the list (or if head was null, establish the first
293 <     *    node). Nodes can be appended only if their predecessors are
294 <     *    either already matched or are of the same mode. If we detect
295 <     *    otherwise, then a new node with opposite mode must have been
296 <     *    appended during traversal, so must restart at phase 1. The
297 <     *    traversal and update steps are otherwise similar to phase 1:
298 <     *    Retrying upon CAS misses and checking for staleness.  In
299 <     *    particular, if a self-link is encountered, then we can
300 <     *    safely jump to a node on the list by continuing the
301 <     *    traversal at current head.
291 >     *    Starting at current tail pointer, find the actual last node
292 >     *    and try to append a new node (or if head was null, establish
293 >     *    the first node). Nodes can be appended only if their
294 >     *    predecessors are either already matched or are of the same
295 >     *    mode. If we detect otherwise, then a new node with opposite
296 >     *    mode must have been appended during traversal, so we must
297 >     *    restart at phase 1. The traversal and update steps are
298 >     *    otherwise similar to phase 1: Retrying upon CAS misses and
299 >     *    checking for staleness.  In particular, if a self-link is
300 >     *    encountered, then we can safely jump to a node on the list
301 >     *    by continuing the traversal at current head.
302       *
303       *    On successful append, if the call was ASYNC, return.
304       *
305       * 3. Await match or cancellation (method awaitMatch)
306       *
307       *    Wait for another thread to match node; instead cancelling if
308 <     *    current thread was interrupted or the wait timed out. On
308 >     *    the current thread was interrupted or the wait timed out. On
309       *    multiprocessors, we use front-of-queue spinning: If a node
310       *    appears to be the first unmatched node in the queue, it
311       *    spins a bit before blocking. In either case, before blocking
# Line 317 | Line 320 | public class LinkedTransferQueue<E> exte
320       *    to decide to occasionally perform a Thread.yield. While
321       *    yield has underdefined specs, we assume that might it help,
322       *    and will not hurt in limiting impact of spinning on busy
323 <     *    systems.  We also use much smaller (1/4) spins for nodes
324 <     *    that are not known to be front but whose predecessors have
325 <     *    not blocked -- these "chained" spins avoid artifacts of
323 >     *    systems.  We also use smaller (1/2) spins for nodes that are
324 >     *    not known to be front but whose predecessors have not
325 >     *    blocked -- these "chained" spins avoid artifacts of
326       *    front-of-queue rules which otherwise lead to alternating
327       *    nodes spinning vs blocking. Further, front threads that
328       *    represent phase changes (from data to request node or vice
329       *    versa) compared to their predecessors receive additional
330 <     *    spins, reflecting the longer code path lengths necessary to
331 <     *    release them under contention.
330 >     *    chained spins, reflecting longer paths typically required to
331 >     *    unblock threads during phase changes.
332       */
333  
334      /** True if on multiprocessor */
# Line 333 | Line 336 | public class LinkedTransferQueue<E> exte
336          Runtime.getRuntime().availableProcessors() > 1;
337  
338      /**
339 <     * The number of times to spin (with on average one randomly
340 <     * interspersed call to Thread.yield) on multiprocessor before
341 <     * blocking when a node is apparently the first waiter in the
342 <     * queue.  See above for explanation. Must be a power of two. The
343 <     * value is empirically derived -- it works pretty well across a
344 <     * variety of processors, numbers of CPUs, and OSes.
339 >     * The number of times to spin (with randomly interspersed calls
340 >     * to Thread.yield) on multiprocessor before blocking when a node
341 >     * is apparently the first waiter in the queue.  See above for
342 >     * explanation. Must be a power of two. The value is empirically
343 >     * derived -- it works pretty well across a variety of processors,
344 >     * numbers of CPUs, and OSes.
345       */
346      private static final int FRONT_SPINS   = 1 << 7;
347  
348      /**
349       * The number of times to spin before blocking when a node is
350 <     * preceded by another node that is apparently spinning.
350 >     * preceded by another node that is apparently spinning.  Also
351 >     * serves as an increment to FRONT_SPINS on phase changes, and as
352 >     * base average frequency for yielding during spins. Must be a
353 >     * power of two.
354       */
355 <    private static final int CHAINED_SPINS = FRONT_SPINS >>> 2;
355 >    private static final int CHAINED_SPINS = FRONT_SPINS >>> 1;
356  
357      /**
358       * Queue nodes. Uses Object, not E, for items to allow forgetting
# Line 524 | Line 530 | public class LinkedTransferQueue<E> exte
530                  if (pred == null)
531                      continue retry;           // lost race vs opposite mode
532                  if (how >= SYNC)
533 <                    return awaitMatch(pred, s, e, how, nanos);
533 >                    return awaitMatch(s, pred, e, how, nanos);
534              }
535              return e; // not waiting
536          }
# Line 568 | Line 574 | public class LinkedTransferQueue<E> exte
574      /**
575       * Spins/yields/blocks until node s is matched or caller gives up.
576       *
571     * @param pred the predecessor of s, or s or null if none
577       * @param s the waiting node
578 +     * @param pred the predecessor of s, or s itself if it has no
579 +     * predecessor, or null if unknown (the null case does not occur
580 +     * in any current calls but may in possible future extensions)
581       * @param e the comparison value for checking match
582       * @param how either SYNC or TIMEOUT
583       * @param nanos timeout value
584       * @return matched item, or e if unmatched on interrupt or timeout
585       */
586 <    private Object awaitMatch(Node pred, Node s, Object e,
586 >    private Object awaitMatch(Node s, Node pred, Object e,
587                                int how, long nanos) {
588          long lastTime = (how == TIMEOUT) ? System.nanoTime() : 0L;
589          Thread w = Thread.currentThread();
# Line 598 | Line 606 | public class LinkedTransferQueue<E> exte
606                  if ((spins = spinsFor(pred, s.isData)) > 0)
607                      randomYields = ThreadLocalRandom.current();
608              }
609 <            else if (spins > 0) {             // spin, occasionally yield
610 <                if (randomYields.nextInt(FRONT_SPINS) == 0)
611 <                    Thread.yield();
612 <                --spins;
609 >            else if (spins > 0) {             // spin
610 >                if (--spins == 0)
611 >                    shortenHeadPath();        // reduce slack before blocking
612 >                else if (randomYields.nextInt(CHAINED_SPINS) == 0)
613 >                    Thread.yield();           // occasionally yield
614              }
615              else if (s.waiter == null) {
607                shortenHeadPath();            // reduce slack before blocking
616                  s.waiter = w;                 // request unpark
617              }
618              else if (how == TIMEOUT) {
# Line 626 | Line 634 | public class LinkedTransferQueue<E> exte
634       */
635      private static int spinsFor(Node pred, boolean haveData) {
636          if (MP && pred != null) {
637 <            boolean predData = pred.isData;
638 <            if (predData != haveData)         // front and phase change
639 <                return FRONT_SPINS + (FRONT_SPINS >>> 1);
632 <            if (predData != (pred.item != null)) // probably at front
637 >            if (pred.isData != haveData)      // phase change
638 >                return FRONT_SPINS + CHAINED_SPINS;
639 >            if (pred.isMatched())             // probably at front
640                  return FRONT_SPINS;
641              if (pred.waiter == null)          // pred apparently spinning
642                  return CHAINED_SPINS;

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines