/*
* Written by Doug Lea, Bill Scherer, and Michael Scott with
* assistance from members of JCP JSR-166 Expert Group and released to
* the public domain, as explained at
* http://creativecommons.org/licenses/publicdomain
*/
package java.util.concurrent;
import java.util.concurrent.*; // for javadoc (till 6280605 is fixed)
import java.util.concurrent.locks.*;
import java.util.concurrent.atomic.*;
import java.util.Random;
/**
* A synchronization point at which threads can pair and swap elements
* within pairs. Each thread presents some object on entry to the
* {@link #exchange exchange} method, matches with a partner thread,
* and receives its partner's object on return.
*
*
Sample Usage:
* Here are the highlights of a class that uses an {@code Exchanger}
* to swap buffers between threads so that the thread filling the
* buffer gets a freshly emptied one when it needs it, handing off the
* filled one to the thread emptying the buffer.
*
Memory consistency effects: For each pair of threads that
* successfully exchange objects via an {@code Exchanger}, actions
* prior to the {@code exchange()} in each thread
* happen-before
* those subsequent to a return from the corresponding {@code exchange()}
* in the other thread.
*
* @since 1.5
* @author Doug Lea and Bill Scherer and Michael Scott
* @param The type of objects that may be exchanged
*/
public class Exchanger {
/*
* The underlying idea is to use a stack to hold nodes containing
* pairs of items to be exchanged. Except that:
*
* * Only one element of the pair is known on creation by a
* first-arriving thread; the other is a "hole" waiting to be
* filled in. This is a degenerate form of the dual stacks
* described in "Nonblocking Concurrent Objects with Condition
* Synchronization", by W. N. Scherer III and M. L. Scott.
* 18th Annual Conf. on Distributed Computing, Oct. 2004.
* It is "degenerate" in that both the items and the holes
* are shared in the same nodes.
*
* * There isn't really a stack here! There can't be -- if two
* nodes were both in the stack, they should cancel themselves
* out by combining. So that's what we do. The 0th element of
* the "arena" array serves only as the top of stack. The
* remainder of the array is a form of the elimination backoff
* collision array described in "A Scalable Lock-free Stack
* Algorithm", by D. Hendler, N. Shavit, and L. Yerushalmi.
* 16th ACM Symposium on Parallelism in Algorithms and
* Architectures, June 2004. Here, threads spin (using short
* timed waits with exponential backoff) looking for each
* other. If they fail to find others waiting, they try the
* top spot again. As shown in that paper, this always
* converges.
*
* The backoff elimination mechanics never come into play in
* common usages where only two threads ever meet to exchange
* items, but they prevent contention bottlenecks when an
* exchanger is used by a large number of threads.
*
* For more details, see the paper "A Scalable Elimination-based
* Exchange Channel" by William Scherer, Doug Lea, and Michael
* Scott in Proceedings of SCOOL05 workshop. Available at:
* http://hdl.handle.net/1802/2104
*/
/** The number of CPUs, for sizing and spin control */
static final int NCPUS = Runtime.getRuntime().availableProcessors();
/**
* Size of collision space. Using a size of half the number of
* CPUs provides enough space for threads to find each other but
* not so much that it would always require one or more to time
* out to become unstuck. Note that the arena array holds SIZE+1
* elements, to include the top-of-stack slot. Imposing a ceiling
* is suboptimal for huge machines, but bounds backoff times to
* acceptable values. To ensure max times less than 2.4 seconds,
* the ceiling value plus the shift value of backoff base (below)
* should be less than or equal to 31.
*/
private static final int SIZE = Math.min(25, (NCPUS + 1) / 2);
/**
* Base unit in nanoseconds for backoffs. Must be a power of two.
* Should be small because backoffs exponentially increase from base.
* The value should be close to the round-trip time of a call to
* LockSupport.park in the case where some other thread has already
* called unpark. On multiprocessors, timed waits less than this value
* are implemented by spinning.
*/
static final long BACKOFF_BASE = (1L << 6);
/**
* The number of nanoseconds for which it is faster to spin rather
* than to use timed park. Should normally be zero on
* uniprocessors and BACKOFF_BASE on multiprocessors.
*/
static final long spinForTimeoutThreshold = (NCPUS < 2) ? 0 : BACKOFF_BASE;
/**
* The number of times to spin before blocking in timed waits.
* The value is empirically derived -- it works well across a
* variety of processors and OSes. Empirically, the best value
* seems not to vary with number of CPUs (beyond 2) so is just
* a constant.
*/
static final int maxTimedSpins = (NCPUS < 2) ? 0 : 16;
/**
* The number of times to spin before blocking in untimed waits.
* This is greater than timed value because untimed waits spin
* faster since they don't need to check times on each spin.
*/
static final int maxUntimedSpins = maxTimedSpins * 32;
/**
* Sentinel item representing cancellation. This value is placed
* in holes on cancellation, and used as a return value from Node
* methods to indicate failure to set or get hole.
*/
static final Object FAIL = new Object();
/**
* The collision arena. arena[0] is used as the top of the stack.
* The remainder is used as the collision elimination space.
*/
private final AtomicReference[] arena;
/**
* Per-thread random number generator. Because random numbers
* are used to choose slots and delays to reduce contention, the
* random number generator itself cannot introduce contention.
* And the statistical quality of the generator is not too
* important. So we use a custom cheap generator, and maintain
* it as a thread local.
*/
private static final ThreadLocal random = new ThreadLocal() {
public RNG initialValue() { return new RNG(); } };
/**
* Creates a new Exchanger.
*/
public Exchanger() {
arena = (AtomicReference[]) new AtomicReference[SIZE + 1];
for (int i = 0; i < arena.length; ++i)
arena[i] = new AtomicReference();
}
/**
* Main exchange function, handling the different policy variants.
* Uses Object, not "V" as argument and return value to simplify
* handling of internal sentinel values. Callers from public
* methods cast accordingly.
*
* @param item the item to exchange
* @param timed true if the wait is timed
* @param nanos if timed, the maximum wait time
* @return the other thread's item
*/
private Object doExchange(Object item, boolean timed, long nanos)
throws InterruptedException, TimeoutException {
long lastTime = timed ? System.nanoTime() : 0;
int idx = 0; // start out at slot representing top
int backoff = 0; // increases on failure to occupy a slot
Node me = new Node(item);
for (;;) {
AtomicReference slot = arena[idx];
Node you = slot.get();
// Try to occupy this slot
if (you == null && slot.compareAndSet(null, me)) {
// If this is top slot, use regular wait, else backoff-wait
Object v = ((idx == 0)?
me.waitForHole(timed, nanos) :
me.waitForHole(true, randomDelay(backoff)));
if (slot.get() == me)
slot.compareAndSet(me, null);
if (v != FAIL)
return v;
if (Thread.interrupted())
throw new InterruptedException();
if (timed) {
long now = System.nanoTime();
nanos -= now - lastTime;
lastTime = now;
if (nanos <= 0)
throw new TimeoutException();
}
me = new Node(me.item); // Throw away nodes on failure
if (backoff < SIZE - 1) // Increase or stay saturated
++backoff;
idx = 0; // Restart at top
continue;
}
// Try to release waiter from apparently non-empty slot
if (you != null || (you = slot.get()) != null) {
boolean success = (you.get() == null &&
you.compareAndSet(null, me.item));
if (slot.get() == you)
slot.compareAndSet(you, null);
if (success) {
you.signal();
return you.item;
}
}
// Retry with a random non-top slot <= backoff
idx = backoff == 0 ? 1 : 1 + random.get().next() % (backoff + 1);
}
}
/**
* Returns a random delay less than (base times (2 raised to backoff)).
*/
private long randomDelay(int backoff) {
return ((BACKOFF_BASE << backoff) - 1) & random.get().next();
}
/**
* Nodes hold partially exchanged data. This class
* opportunistically subclasses AtomicReference to represent the
* hole. So get() returns hole, and compareAndSet CAS'es value
* into hole. Note that this class cannot be parameterized as V
* because the sentinel value FAIL is only of type Object.
*/
static final class Node extends AtomicReference