--- jsr166/src/jsr166y/LinkedTransferQueue.java 2009/10/22 15:58:44 1.49 +++ jsr166/src/jsr166y/LinkedTransferQueue.java 2009/11/02 15:32:00 1.65 @@ -161,17 +161,17 @@ public class LinkedTransferQueue exte * targets. Even when using very small slack values, this * approach works well for dual queues because it allows all * operations up to the point of matching or appending an item - * (hence potentially releasing another thread) to be read-only, - * thus not introducing any further contention. As described - * below, we implement this by performing slack maintenance - * retries only after these points. + * (hence potentially allowing progress by another thread) to be + * read-only, thus not introducing any further contention. As + * described below, we implement this by performing slack + * maintenance retries only after these points. * * As an accompaniment to such techniques, traversal overhead can * be further reduced without increasing contention of head - * pointer updates. During traversals, threads may sometimes - * shortcut the "next" link path from the current "head" node to - * be closer to the currently known first unmatched node. Again, - * this may be triggered with using thresholds or randomization. + * pointer updates: Threads may sometimes shortcut the "next" link + * path from the current "head" node to be closer to the currently + * known first unmatched node, and similarly for tail. Again, this + * may be triggered with using thresholds or randomization. * * These ideas must be further extended to avoid unbounded amounts * of costly-to-reclaim garbage caused by the sequential "next" @@ -199,7 +199,7 @@ public class LinkedTransferQueue exte * mechanics because an update may leave head at a detached node. * And while direct writes are possible for tail updates, they * increase the risk of long retraversals, and hence long garbage - * chains which can be much more costly than is worthwhile + * chains, which can be much more costly than is worthwhile * considering that the cost difference of performing a CAS vs * write is smaller when they are not triggered on each operation * (especially considering that writes and CASes equally require @@ -207,44 +207,47 @@ public class LinkedTransferQueue exte * more costly than the writes themselves because of contention). * * Removal of interior nodes (due to timed out or interrupted - * waits, or calls to remove or Iterator.remove) uses a scheme - * roughly similar to that in Scherer, Lea, and Scott's - * SynchronousQueue. Given a predecessor, we can unsplice any node - * except the (actual) tail of the queue. To avoid build-up of - * cancelled trailing nodes, upon a request to remove a trailing - * node, it is placed in field "cleanMe" to be unspliced upon the - * next call to unsplice any other node. Situations needing such - * mechanics are not common but do occur in practice; for example - * when an unbounded series of short timed calls to poll - * repeatedly time out but never otherwise fall off the list - * because of an untimed call to take at the front of the - * queue. (Note that maintaining field cleanMe does not otherwise - * much impact garbage retention even if never cleared by some - * other call because the held node will eventually either - * directly or indirectly lead to a self-link once off the list.) + * waits, or calls to remove(x) or Iterator.remove) can use a + * scheme roughly similar to that described in Scherer, Lea, and + * Scott's SynchronousQueue. Given a predecessor, we can unsplice + * any node except the (actual) tail of the queue. To avoid + * build-up of cancelled trailing nodes, upon a request to remove + * a trailing node, it is placed in field "cleanMe" to be + * unspliced upon the next call to unsplice any other node. + * Situations needing such mechanics are not common but do occur + * in practice; for example when an unbounded series of short + * timed calls to poll repeatedly time out but never otherwise + * fall off the list because of an untimed call to take at the + * front of the queue. Note that maintaining field cleanMe does + * not otherwise much impact garbage retention even if never + * cleared by some other call because the held node will + * eventually either directly or indirectly lead to a self-link + * once off the list. * * *** Overview of implementation *** * - * We use a threshold-based approach to updates, with a target - * slack of two. The slack value is hard-wired: a path greater + * We use a threshold-based approach to updates, with a slack + * threshold of two -- that is, we update head/tail when the + * current pointer appears to be two or more steps away from the + * first/last node. The slack value is hard-wired: a path greater * than one is naturally implemented by checking equality of * traversal pointers except when the list has only one element, - * in which case we keep target slack at one. Avoiding tracking + * in which case we keep slack threshold at one. Avoiding tracking * explicit counts across method calls slightly simplifies an * already-messy implementation. Using randomization would * probably work better if there were a low-quality dirt-cheap * per-thread one available, but even ThreadLocalRandom is too * heavy for these purposes. * - * With such a small target slack value, it is rarely worthwhile - * to augment this with path short-circuiting; i.e., unsplicing - * nodes between head and the first unmatched node, or similarly - * for tail, rather than advancing head or tail proper. However, - * it is used (in awaitMatch) immediately before a waiting thread - * starts to block, as a final bit of helping at a point when - * contention with others is extremely unlikely (since if other - * threads that could release it are operating, then the current - * thread wouldn't be blocking). + * With such a small slack threshold value, it is rarely + * worthwhile to augment this with path short-circuiting; i.e., + * unsplicing nodes between head and the first unmatched node, or + * similarly for tail, rather than advancing head or tail + * proper. However, it is used (in awaitMatch) immediately before + * a waiting thread starts to block, as a final bit of helping at + * a point when contention with others is extremely unlikely + * (since if other threads that could release it are operating, + * then the current thread wouldn't be blocking). * * We allow both the head and tail fields to be null before any * nodes are enqueued; initializing upon first append. This @@ -260,7 +263,7 @@ public class LinkedTransferQueue exte * of offer, put, poll, take, or transfer (each possibly with * timeout). The relative complexity of using one monolithic * method outweighs the code bulk and maintenance problems of - * using nine separate methods. + * using separate methods for each case. * * Operation consists of up to three phases. The first is * implemented within method xfer, the second in tryAppend, and @@ -285,24 +288,24 @@ public class LinkedTransferQueue exte * * 2. Try to append a new node (method tryAppend) * - * Starting at current tail pointer, try to append a new node - * to the list (or if head was null, establish the first - * node). Nodes can be appended only if their predecessors are - * either already matched or are of the same mode. If we detect - * otherwise, then a new node with opposite mode must have been - * appended during traversal, so must restart at phase 1. The - * traversal and update steps are otherwise similar to phase 1: - * Retrying upon CAS misses and checking for staleness. In - * particular, if a self-link is encountered, then we can - * safely jump to a node on the list by continuing the - * traversal at current head. + * Starting at current tail pointer, find the actual last node + * and try to append a new node (or if head was null, establish + * the first node). Nodes can be appended only if their + * predecessors are either already matched or are of the same + * mode. If we detect otherwise, then a new node with opposite + * mode must have been appended during traversal, so we must + * restart at phase 1. The traversal and update steps are + * otherwise similar to phase 1: Retrying upon CAS misses and + * checking for staleness. In particular, if a self-link is + * encountered, then we can safely jump to a node on the list + * by continuing the traversal at current head. * * On successful append, if the call was ASYNC, return. * * 3. Await match or cancellation (method awaitMatch) * * Wait for another thread to match node; instead cancelling if - * current thread was interrupted or the wait timed out. On + * the current thread was interrupted or the wait timed out. On * multiprocessors, we use front-of-queue spinning: If a node * appears to be the first unmatched node in the queue, it * spins a bit before blocking. In either case, before blocking @@ -317,15 +320,15 @@ public class LinkedTransferQueue exte * to decide to occasionally perform a Thread.yield. While * yield has underdefined specs, we assume that might it help, * and will not hurt in limiting impact of spinning on busy - * systems. We also use much smaller (1/4) spins for nodes - * that are not known to be front but whose predecessors have - * not blocked -- these "chained" spins avoid artifacts of + * systems. We also use smaller (1/2) spins for nodes that are + * not known to be front but whose predecessors have not + * blocked -- these "chained" spins avoid artifacts of * front-of-queue rules which otherwise lead to alternating * nodes spinning vs blocking. Further, front threads that * represent phase changes (from data to request node or vice * versa) compared to their predecessors receive additional - * spins, reflecting the longer code path lengths necessary to - * release them under contention. + * chained spins, reflecting longer paths typically required to + * unblock threads during phase changes. */ /** True if on multiprocessor */ @@ -333,20 +336,23 @@ public class LinkedTransferQueue exte Runtime.getRuntime().availableProcessors() > 1; /** - * The number of times to spin (with on average one randomly - * interspersed call to Thread.yield) on multiprocessor before - * blocking when a node is apparently the first waiter in the - * queue. See above for explanation. Must be a power of two. The - * value is empirically derived -- it works pretty well across a - * variety of processors, numbers of CPUs, and OSes. + * The number of times to spin (with randomly interspersed calls + * to Thread.yield) on multiprocessor before blocking when a node + * is apparently the first waiter in the queue. See above for + * explanation. Must be a power of two. The value is empirically + * derived -- it works pretty well across a variety of processors, + * numbers of CPUs, and OSes. */ private static final int FRONT_SPINS = 1 << 7; /** * The number of times to spin before blocking when a node is - * preceded by another node that is apparently spinning. + * preceded by another node that is apparently spinning. Also + * serves as an increment to FRONT_SPINS on phase changes, and as + * base average frequency for yielding during spins. Must be a + * power of two. */ - private static final int CHAINED_SPINS = FRONT_SPINS >>> 2; + private static final int CHAINED_SPINS = FRONT_SPINS >>> 1; /** * Queue nodes. Uses Object, not E, for items to allow forgetting @@ -367,6 +373,7 @@ public class LinkedTransferQueue exte } final boolean casItem(Object cmp, Object val) { + assert cmp == null || cmp.getClass() != Node.class; return UNSAFE.compareAndSwapObject(this, itemOffset, cmp, val); } @@ -403,7 +410,14 @@ public class LinkedTransferQueue exte */ final boolean isMatched() { Object x = item; - return x == this || (x != null) != isData; + return (x == this) || ((x == null) == isData); + } + + /** + * Returns true if this is an unmatched request node. + */ + final boolean isUnmatchedRequest() { + return !isData && item == null; } /** @@ -421,6 +435,7 @@ public class LinkedTransferQueue exte * Tries to artificially match a data node -- used by remove. */ final boolean tryMatchData() { + assert isData; Object x = item; if (x != null && x != this && casItem(x, null)) { LockSupport.unpark(waiter); @@ -442,10 +457,10 @@ public class LinkedTransferQueue exte } /** head of the queue; null until first enqueue */ - private transient volatile Node head; + transient volatile Node head; /** predecessor of dangling unspliceable node */ - private transient volatile Node cleanMe; // decl here to reduce contention + private transient volatile Node cleanMe; // decl here reduces contention /** tail of the queue; null until first append */ private transient volatile Node tail; @@ -464,25 +479,30 @@ public class LinkedTransferQueue exte } /* - * Possible values for "how" argument in xfer method. Beware that - * the order of assigned numerical values matters. + * Possible values for "how" argument in xfer method. */ - private static final int NOW = 0; // for untimed poll, tryTransfer - private static final int ASYNC = 1; // for offer, put, add - private static final int SYNC = 2; // for transfer, take - private static final int TIMEOUT = 3; // for timed poll, tryTransfer + private static final int NOW = 0; // for untimed poll, tryTransfer + private static final int ASYNC = 1; // for offer, put, add + private static final int SYNC = 2; // for transfer, take + private static final int TIMED = 3; // for timed poll, tryTransfer + + @SuppressWarnings("unchecked") + static E cast(Object item) { + assert item == null || item.getClass() != Node.class; + return (E) item; + } /** * Implements all queuing methods. See above for explanation. * * @param e the item or null for take * @param haveData true if this is a put, else a take - * @param how NOW, ASYNC, SYNC, or TIMEOUT - * @param nanos timeout in nanosecs, used only if mode is TIMEOUT + * @param how NOW, ASYNC, SYNC, or TIMED + * @param nanos timeout in nanosecs, used only if mode is TIMED * @return an item if matched, else e * @throws NullPointerException if haveData mode but e is null */ - private Object xfer(Object e, boolean haveData, int how, long nanos) { + private E xfer(E e, boolean haveData, int how, long nanos) { if (haveData && (e == null)) throw new NullPointerException(); Node s = null; // the node to append, if needed @@ -496,35 +516,34 @@ public class LinkedTransferQueue exte if (isData == haveData) // can't match break; if (p.casItem(item, e)) { // match - Thread w = p.waiter; - while (p != h) { // update head - Node n = p.next; // by 2 unless singleton - if (n != null) - p = n; - if (head == h && casHead(h, p)) { + for (Node q = p; q != h;) { + Node n = q.next; // update head by 2 + if (n != null) // unless singleton + q = n; + if (head == h && casHead(h, q)) { h.forgetNext(); break; } // advance and retry if ((h = head) == null || - (p = h.next) == null || !p.isMatched()) + (q = h.next) == null || !q.isMatched()) break; // unless slack < 2 } - LockSupport.unpark(w); - return item; + LockSupport.unpark(p.waiter); + return this.cast(item); } } Node n = p.next; p = (p != n) ? n : (h = head); // Use head if p offlist } - if (how >= ASYNC) { // No matches available + if (how != NOW) { // No matches available if (s == null) s = new Node(e, haveData); Node pred = tryAppend(s, haveData); if (pred == null) continue retry; // lost race vs opposite mode - if (how >= SYNC) - return awaitMatch(pred, s, e, how, nanos); + if (how != ASYNC) + return awaitMatch(s, pred, e, (how == TIMED), nanos); } return e; // not waiting } @@ -540,7 +559,7 @@ public class LinkedTransferQueue exte * predecessor */ private Node tryAppend(Node s, boolean haveData) { - for (Node t = tail, p = t;;) { // move p to last node and append + for (Node t = tail, p = t;;) { // move p to last node and append Node n, u; // temps for reads of next & tail if (p == null && (p = head) == null) { if (casHead(null, s)) @@ -568,16 +587,17 @@ public class LinkedTransferQueue exte /** * Spins/yields/blocks until node s is matched or caller gives up. * - * @param pred the predecessor of s, or s or null if none * @param s the waiting node + * @param pred the predecessor of s, or s itself if it has no + * predecessor, or null if unknown (the null case does not occur + * in any current calls but may in possible future extensions) * @param e the comparison value for checking match - * @param how either SYNC or TIMEOUT - * @param nanos timeout value + * @param timed if true, wait only until timeout elapses + * @param nanos timeout in nanosecs, used only if timed is true * @return matched item, or e if unmatched on interrupt or timeout */ - private Object awaitMatch(Node pred, Node s, Object e, - int how, long nanos) { - long lastTime = (how == TIMEOUT) ? System.nanoTime() : 0L; + private E awaitMatch(Node s, Node pred, E e, boolean timed, long nanos) { + long lastTime = timed ? System.nanoTime() : 0L; Thread w = Thread.currentThread(); int spins = -1; // initialized after first item and cancel checks ThreadLocalRandom randomYields = null; // bound if needed @@ -585,11 +605,12 @@ public class LinkedTransferQueue exte for (;;) { Object item = s.item; if (item != e) { // matched + assert item != s; s.forgetContents(); // avoid garbage - return item; + return this.cast(item); } - if ((w.isInterrupted() || (how == TIMEOUT && nanos <= 0)) && - s.casItem(e, s)) { // cancel + if ((w.isInterrupted() || (timed && nanos <= 0)) && + s.casItem(e, s)) { // cancel unsplice(pred, s); return e; } @@ -598,16 +619,16 @@ public class LinkedTransferQueue exte if ((spins = spinsFor(pred, s.isData)) > 0) randomYields = ThreadLocalRandom.current(); } - else if (spins > 0) { // spin, occasionally yield - if (randomYields.nextInt(FRONT_SPINS) == 0) - Thread.yield(); - --spins; + else if (spins > 0) { // spin + if (--spins == 0) + shortenHeadPath(); // reduce slack before blocking + else if (randomYields.nextInt(CHAINED_SPINS) == 0) + Thread.yield(); // occasionally yield } else if (s.waiter == null) { - shortenHeadPath(); // reduce slack before blocking - s.waiter = w; // request unpark + s.waiter = w; // request unpark then recheck } - else if (how == TIMEOUT) { + else if (timed) { long now = System.nanoTime(); if ((nanos -= now - lastTime) > 0) LockSupport.parkNanos(this, nanos); @@ -615,6 +636,7 @@ public class LinkedTransferQueue exte } else { LockSupport.park(this); + s.waiter = null; spins = -1; // spin if front upon wakeup } } @@ -626,10 +648,9 @@ public class LinkedTransferQueue exte */ private static int spinsFor(Node pred, boolean haveData) { if (MP && pred != null) { - boolean predData = pred.isData; - if (predData != haveData) // front and phase change - return FRONT_SPINS + (FRONT_SPINS >>> 1); - if (predData != (pred.item != null)) // probably at front + if (pred.isData != haveData) // phase change + return FRONT_SPINS + CHAINED_SPINS; + if (pred.isMatched()) // probably at front return FRONT_SPINS; if (pred.waiter == null) // pred apparently spinning return CHAINED_SPINS; @@ -661,31 +682,40 @@ public class LinkedTransferQueue exte /* -------------- Traversal methods -------------- */ /** + * Returns the successor of p, or the head node if p.next has been + * linked to self, which will only be true if traversing with a + * stale pointer that is now off the list. + */ + final Node succ(Node p) { + Node next = p.next; + return (p == next) ? head : next; + } + + /** * Returns the first unmatched node of the given mode, or null if * none. Used by methods isEmpty, hasWaitingConsumer. */ - private Node firstOfMode(boolean data) { - for (Node p = head; p != null; ) { + private Node firstOfMode(boolean isData) { + for (Node p = head; p != null; p = succ(p)) { if (!p.isMatched()) - return (p.isData == data) ? p : null; - Node n = p.next; - p = (n != p) ? n : head; + return (p.isData == isData) ? p : null; } return null; } /** * Returns the item in the first unmatched node with isData; or - * null if none. Used by peek. + * null if none. Used by peek. */ - private Object firstDataItem() { - for (Node p = head; p != null; ) { - boolean isData = p.isData; + private E firstDataItem() { + for (Node p = head; p != null; p = succ(p)) { Object item = p.item; - if (item != p && (item != null) == isData) - return isData ? item : null; - Node n = p.next; - p = (n != p) ? n : head; + if (p.isData) { + if (item != null && item != p) + return this.cast(item); + } + else if (item == null) + return null; } return null; } @@ -716,30 +746,28 @@ public class LinkedTransferQueue exte final class Itr implements Iterator { private Node nextNode; // next node to return item for - private Object nextItem; // the corresponding item + private E nextItem; // the corresponding item private Node lastRet; // last returned node, to support remove + private Node lastPred; // predecessor to unlink lastRet /** * Moves to next node after prev, or first node if prev null. */ private void advance(Node prev) { + lastPred = lastRet; lastRet = prev; - Node p; - if (prev == null || (p = prev.next) == prev) - p = head; - while (p != null) { + for (Node p = (prev == null) ? head : succ(prev); + p != null; p = succ(p)) { Object item = p.item; if (p.isData) { if (item != null && item != p) { - nextItem = item; + nextItem = LinkedTransferQueue.this.cast(item); nextNode = p; return; } } else if (item == null) break; - Node n = p.next; - p = (n != p) ? n : head; } nextNode = null; } @@ -755,16 +783,15 @@ public class LinkedTransferQueue exte public final E next() { Node p = nextNode; if (p == null) throw new NoSuchElementException(); - Object e = nextItem; + E e = nextItem; advance(p); - return (E) e; + return e; } public final void remove() { Node p = lastRet; if (p == null) throw new IllegalStateException(); - lastRet = null; - findAndRemoveNode(p); + findAndRemoveDataNode(lastPred, p); } } @@ -798,8 +825,10 @@ public class LinkedTransferQueue exte break; } if (oldpred == pred || // Already saved - (oldpred == null && casCleanMe(null, pred))) - break; // Postpone cleaning + ((oldpred == null || oldpred.next == s) && + casCleanMe(oldpred, pred))) { + break; + } } } } @@ -840,23 +869,28 @@ public class LinkedTransferQueue exte /** * Main implementation of Iterator.remove(). Find - * and unsplice the given node. + * and unsplice the given data node. + * @param possiblePred possible predecessor of s + * @param s the node to remove */ - final void findAndRemoveNode(Node s) { + final void findAndRemoveDataNode(Node possiblePred, Node s) { + assert s.isData; if (s.tryMatchData()) { - Node pred = null; - Node p = head; - while (p != null) { - if (p == s) { - unsplice(pred, p); - break; - } - if (!p.isData && !p.isMatched()) - break; - pred = p; - if ((p = p.next) == pred) { // stale - pred = null; - p = head; + if (possiblePred != null && possiblePred.next == s) + unsplice(possiblePred, s); // was actual predecessor + else { + for (Node pred = null, p = head; p != null; ) { + if (p == s) { + unsplice(pred, p); + break; + } + if (p.isUnmatchedRequest()) + break; + pred = p; + if ((p = p.next) == pred) { // stale + pred = null; + p = head; + } } } } @@ -867,9 +901,7 @@ public class LinkedTransferQueue exte */ private boolean findAndRemove(Object e) { if (e != null) { - Node pred = null; - Node p = head; - while (p != null) { + for (Node pred = null, p = head; p != null; ) { Object item = p.item; if (p.isData) { if (item != null && item != p && e.equals(item) && @@ -881,7 +913,7 @@ public class LinkedTransferQueue exte else if (item == null) break; pred = p; - if ((p = p.next) == pred) { + if ((p = p.next) == pred) { // stale pred = null; p = head; } @@ -1009,7 +1041,7 @@ public class LinkedTransferQueue exte */ public boolean tryTransfer(E e, long timeout, TimeUnit unit) throws InterruptedException { - if (xfer(e, true, TIMEOUT, unit.toNanos(timeout)) == null) + if (xfer(e, true, TIMED, unit.toNanos(timeout)) == null) return true; if (!Thread.interrupted()) return false; @@ -1017,22 +1049,22 @@ public class LinkedTransferQueue exte } public E take() throws InterruptedException { - Object e = xfer(null, false, SYNC, 0); + E e = xfer(null, false, SYNC, 0); if (e != null) - return (E)e; + return e; Thread.interrupted(); throw new InterruptedException(); } public E poll(long timeout, TimeUnit unit) throws InterruptedException { - Object e = xfer(null, false, TIMEOUT, unit.toNanos(timeout)); + E e = xfer(null, false, TIMED, unit.toNanos(timeout)); if (e != null || !Thread.interrupted()) - return (E)e; + return e; throw new InterruptedException(); } public E poll() { - return (E)xfer(null, false, NOW, 0); + return xfer(null, false, NOW, 0); } /** @@ -1089,7 +1121,7 @@ public class LinkedTransferQueue exte } public E peek() { - return (E) firstDataItem(); + return firstDataItem(); } /** @@ -1185,7 +1217,6 @@ public class LinkedTransferQueue exte } } - // Unsafe mechanics private static final sun.misc.Unsafe UNSAFE = getUnsafe(); @@ -1208,7 +1239,14 @@ public class LinkedTransferQueue exte } } - private static sun.misc.Unsafe getUnsafe() { + /** + * Returns a sun.misc.Unsafe. Suitable for use in a 3rd party package. + * Replace with a simple call to Unsafe.getUnsafe when integrating + * into a jdk. + * + * @return a sun.misc.Unsafe + */ + static sun.misc.Unsafe getUnsafe() { try { return sun.misc.Unsafe.getUnsafe(); } catch (SecurityException se) {