1 |
|
/* |
2 |
|
* Written by Doug Lea and released to the public domain, as explained at |
3 |
< |
* http://creativecommons.org/licenses/publicdomain |
3 |
> |
* http://creativecommons.org/publicdomain/zero/1.0/ |
4 |
|
*/ |
5 |
|
|
6 |
|
/* |
11 |
|
* just compare and conditionally store (int) values, so are |
12 |
|
* not intended to measure the "raw" cost of a CAS. |
13 |
|
* |
14 |
< |
* Outputs, for runs using 1 ... #cpus threads are, in nanoseconds: |
14 |
> |
* Outputs, in nanoseconds: |
15 |
|
* "Atomic CAS" AtomicInteger.compareAndSet |
16 |
+ |
* "Updater CAS" CAS first comparing args |
17 |
|
* "Volatile" pseudo-CAS using volatile store if comparison succeeds |
18 |
|
* "Mutex" emulated compare and set done under AQS-based mutex lock |
19 |
|
* "Synchronized" emulated compare and set done under a synchronized block. |
20 |
|
* |
21 |
< |
* The last two are done only if this program is called with (any) argument |
21 |
> |
* By default, these are printed for 1..#cpus threads, but you can |
22 |
> |
* change the upper bound number of threads by providing the |
23 |
> |
* first argument to this program. |
24 |
> |
* |
25 |
> |
* The last two kinds of runs (mutex and synchronized) are done only |
26 |
> |
* if this program is called with (any) second argument |
27 |
|
*/ |
28 |
|
|
29 |
< |
|
29 |
> |
import java.util.concurrent.CyclicBarrier; |
30 |
|
import java.util.concurrent.atomic.AtomicInteger; |
31 |
+ |
import java.util.concurrent.atomic.AtomicIntegerFieldUpdater; |
32 |
|
import java.util.concurrent.atomic.AtomicLong; |
33 |
< |
import java.util.concurrent.*; |
27 |
< |
import java.util.concurrent.locks.*; |
33 |
> |
import java.util.concurrent.locks.AbstractQueuedSynchronizer; |
34 |
|
|
35 |
|
public class CASLoops { |
36 |
|
|
37 |
|
static final int TRIALS = 2; |
38 |
|
static final long BASE_SECS_PER_RUN = 4; |
39 |
|
static final int NCPUS = Runtime.getRuntime().availableProcessors(); |
40 |
+ |
static int maxThreads = NCPUS; |
41 |
|
|
42 |
|
static boolean includeLocks = false; |
43 |
|
|
44 |
|
public static void main(String[] args) throws Exception { |
45 |
|
if (args.length > 0) |
46 |
+ |
maxThreads = Integer.parseInt(args[0]); |
47 |
+ |
|
48 |
+ |
loopIters = new long[maxThreads+1]; |
49 |
+ |
|
50 |
+ |
if (args.length > 1) |
51 |
|
includeLocks = true; |
52 |
|
|
53 |
|
System.out.println("Warmup..."); |
54 |
+ |
for (int i = maxThreads; i > 0; --i) { |
55 |
+ |
runCalibration(i, 10); |
56 |
+ |
oneRun(i, loopIters[i] / 4, false); |
57 |
+ |
System.out.print("."); |
58 |
+ |
} |
59 |
+ |
|
60 |
+ |
for (int i = 1; i <= maxThreads; ++i) |
61 |
+ |
loopIters[i] = 0; |
62 |
+ |
|
63 |
|
for (int j = 0; j < 2; ++j) { |
64 |
< |
for (int i = 1; i <= NCPUS; ++i) { |
64 |
> |
for (int i = 1; i <= maxThreads; ++i) { |
65 |
|
runCalibration(i, 1000); |
66 |
|
oneRun(i, loopIters[i] / 8, false); |
67 |
+ |
System.out.print("."); |
68 |
|
} |
69 |
|
} |
70 |
|
|
71 |
< |
for (int i = 1; i <= NCPUS; ++i) |
71 |
> |
for (int i = 1; i <= maxThreads; ++i) |
72 |
|
loopIters[i] = 0; |
73 |
|
|
74 |
|
for (int j = 0; j < TRIALS; ++j) { |
75 |
|
System.out.println("Trial " + j); |
76 |
< |
for (int i = 1; i <= NCPUS; ++i) { |
76 |
> |
for (int i = 1; i <= maxThreads; ++i) { |
77 |
|
runCalibration(i, BASE_SECS_PER_RUN * 1000L); |
78 |
|
oneRun(i, loopIters[i], true); |
79 |
|
} |
86 |
|
|
87 |
|
static final LoopHelpers.MarsagliaRandom rng = new LoopHelpers.MarsagliaRandom(); |
88 |
|
|
89 |
< |
static final long[] loopIters = new long[NCPUS+1]; |
89 |
> |
static long[] loopIters; |
90 |
|
|
91 |
|
static final class NonAtomicInteger { |
92 |
|
volatile int readBarrier; |
107 |
|
void set(int val) { value = val; } |
108 |
|
} |
109 |
|
|
110 |
+ |
static final class UpdaterAtomicInteger { |
111 |
+ |
volatile int value; |
112 |
+ |
|
113 |
+ |
static final AtomicIntegerFieldUpdater<UpdaterAtomicInteger> |
114 |
+ |
valueUpdater = AtomicIntegerFieldUpdater.newUpdater |
115 |
+ |
(UpdaterAtomicInteger.class, "value"); |
116 |
+ |
|
117 |
+ |
UpdaterAtomicInteger() {} |
118 |
+ |
int get() { |
119 |
+ |
return value; |
120 |
+ |
} |
121 |
+ |
boolean compareAndSet(int cmp, int val) { |
122 |
+ |
return valueUpdater.compareAndSet(this, cmp, val); |
123 |
+ |
} |
124 |
+ |
|
125 |
+ |
void set(int val) { value = val; } |
126 |
+ |
} |
127 |
+ |
|
128 |
|
static final class VolatileInteger { |
129 |
|
volatile int value; |
130 |
|
|
159 |
|
synchronized void set(int val) { value = val; } |
160 |
|
} |
161 |
|
|
122 |
– |
|
162 |
|
static final class LockedInteger extends AbstractQueuedSynchronizer { |
163 |
|
int value; |
164 |
|
LockedInteger() {} |
188 |
|
unlock(); |
189 |
|
} |
190 |
|
} |
191 |
< |
void set(int val) { |
192 |
< |
lock(); |
191 |
> |
void set(int val) { |
192 |
> |
lock(); |
193 |
|
try { |
194 |
< |
value = val; |
194 |
> |
value = val; |
195 |
|
} finally { |
196 |
|
unlock(); |
197 |
|
} |
198 |
|
} |
199 |
|
} |
200 |
|
|
201 |
< |
// All these versions are copy-paste-hacked to avoid |
201 |
> |
// All these versions are copy-paste-hacked to avoid |
202 |
|
// contamination with virtual call resolution etc. |
203 |
|
|
204 |
|
// Use fixed-length unrollable inner loops to reduce safepoint checks |
217 |
|
|
218 |
|
public void run() { |
219 |
|
try { |
220 |
< |
barrier.await(); |
220 |
> |
barrier.await(); |
221 |
|
long i = iters; |
222 |
|
int y = 0; |
223 |
|
int succ = 0; |
235 |
|
successes.getAndAdd(succ); |
236 |
|
barrier.await(); |
237 |
|
} |
238 |
< |
catch (Exception ie) { |
239 |
< |
return; |
238 |
> |
catch (Exception ie) { |
239 |
> |
return; |
240 |
|
} |
241 |
|
} |
242 |
|
} |
254 |
|
|
255 |
|
public void run() { |
256 |
|
try { |
257 |
< |
barrier.await(); |
257 |
> |
barrier.await(); |
258 |
> |
long i = iters; |
259 |
> |
int y = 0; |
260 |
> |
int succ = 0; |
261 |
> |
while (i > 0) { |
262 |
> |
for (int k = 0; k < innerPerOuter; ++k) { |
263 |
> |
int x = obj.get(); |
264 |
> |
int z = y + LoopHelpers.compute6(x); |
265 |
> |
if (obj.compareAndSet(x, z)) |
266 |
> |
++succ; |
267 |
> |
y = LoopHelpers.compute7(z); |
268 |
> |
} |
269 |
> |
i -= innerPerOuter; |
270 |
> |
} |
271 |
> |
sum.getAndAdd(obj.get()); |
272 |
> |
successes.getAndAdd(succ); |
273 |
> |
barrier.await(); |
274 |
> |
} |
275 |
> |
catch (Exception ie) { |
276 |
> |
return; |
277 |
> |
} |
278 |
> |
} |
279 |
> |
} |
280 |
> |
|
281 |
> |
static final class UpdaterAtomicLoop implements Runnable { |
282 |
> |
final long iters; |
283 |
> |
final UpdaterAtomicInteger obj; |
284 |
> |
final CyclicBarrier barrier; |
285 |
> |
UpdaterAtomicLoop(long iters, UpdaterAtomicInteger obj, CyclicBarrier b) { |
286 |
> |
this.iters = iters; |
287 |
> |
this.obj = obj; |
288 |
> |
this.barrier = b; |
289 |
> |
obj.set(rng.next()); |
290 |
> |
} |
291 |
> |
|
292 |
> |
public void run() { |
293 |
> |
try { |
294 |
> |
barrier.await(); |
295 |
|
long i = iters; |
296 |
|
int y = 0; |
297 |
|
int succ = 0; |
309 |
|
successes.getAndAdd(succ); |
310 |
|
barrier.await(); |
311 |
|
} |
312 |
< |
catch (Exception ie) { |
313 |
< |
return; |
312 |
> |
catch (Exception ie) { |
313 |
> |
return; |
314 |
|
} |
315 |
|
} |
316 |
|
} |
328 |
|
|
329 |
|
public void run() { |
330 |
|
try { |
331 |
< |
barrier.await(); |
331 |
> |
barrier.await(); |
332 |
|
long i = iters; |
333 |
|
int y = 0; |
334 |
|
int succ = 0; |
346 |
|
successes.getAndAdd(succ); |
347 |
|
barrier.await(); |
348 |
|
} |
349 |
< |
catch (Exception ie) { |
350 |
< |
return; |
349 |
> |
catch (Exception ie) { |
350 |
> |
return; |
351 |
|
} |
352 |
|
} |
353 |
|
} |
365 |
|
|
366 |
|
public void run() { |
367 |
|
try { |
368 |
< |
barrier.await(); |
368 |
> |
barrier.await(); |
369 |
|
long i = iters; |
370 |
|
int y = 0; |
371 |
|
int succ = 0; |
383 |
|
successes.getAndAdd(succ); |
384 |
|
barrier.await(); |
385 |
|
} |
386 |
< |
catch (Exception ie) { |
387 |
< |
return; |
386 |
> |
catch (Exception ie) { |
387 |
> |
return; |
388 |
|
} |
389 |
|
} |
390 |
|
} |
402 |
|
|
403 |
|
public void run() { |
404 |
|
try { |
405 |
< |
barrier.await(); |
405 |
> |
barrier.await(); |
406 |
|
long i = iters; |
407 |
|
int y = 0; |
408 |
|
int succ = 0; |
420 |
|
successes.getAndAdd(succ); |
421 |
|
barrier.await(); |
422 |
|
} |
423 |
< |
catch (Exception ie) { |
424 |
< |
return; |
423 |
> |
catch (Exception ie) { |
424 |
> |
return; |
425 |
|
} |
426 |
|
} |
427 |
|
} |
441 |
|
|
442 |
|
public void run() { |
443 |
|
try { |
444 |
< |
barrier.await(); |
444 |
> |
barrier.await(); |
445 |
|
long iters = 0; |
446 |
|
int y = 0; |
447 |
|
int succ = 0; |
464 |
|
successes.getAndAdd(succ); |
465 |
|
barrier.await(); |
466 |
|
} |
467 |
< |
catch (Exception ie) { |
468 |
< |
return; |
467 |
> |
catch (Exception ie) { |
468 |
> |
return; |
469 |
|
} |
470 |
|
} |
471 |
|
} |
476 |
|
CyclicBarrier b = new CyclicBarrier(n+1); |
477 |
|
totalIters.set(0); |
478 |
|
NonAtomicInteger a = new NonAtomicInteger(); |
479 |
< |
for (int j = 0; j < n; ++j) |
479 |
> |
for (int j = 0; j < n; ++j) |
480 |
|
new Thread(new NACalibrationLoop(endTime, a, b)).start(); |
481 |
|
b.await(); |
482 |
|
b.await(); |
490 |
|
LoopHelpers.BarrierTimer timer = new LoopHelpers.BarrierTimer(); |
491 |
|
CyclicBarrier b = new CyclicBarrier(n+1, timer); |
492 |
|
NonAtomicInteger a = new NonAtomicInteger(); |
493 |
< |
for (int j = 0; j < n; ++j) |
493 |
> |
for (int j = 0; j < n; ++j) |
494 |
|
new Thread(new NonAtomicLoop(iters, a, b)).start(); |
495 |
|
b.await(); |
496 |
|
b.await(); |
498 |
|
return timer.getTime(); |
499 |
|
} |
500 |
|
|
501 |
+ |
static long runUpdaterAtomic(int n, long iters) throws Exception { |
502 |
+ |
LoopHelpers.BarrierTimer timer = new LoopHelpers.BarrierTimer(); |
503 |
+ |
CyclicBarrier b = new CyclicBarrier(n+1, timer); |
504 |
+ |
UpdaterAtomicInteger a = new UpdaterAtomicInteger(); |
505 |
+ |
for (int j = 0; j < n; ++j) |
506 |
+ |
new Thread(new UpdaterAtomicLoop(iters, a, b)).start(); |
507 |
+ |
b.await(); |
508 |
+ |
b.await(); |
509 |
+ |
if (sum.get() == 0) System.out.print(" "); |
510 |
+ |
return timer.getTime(); |
511 |
+ |
} |
512 |
+ |
|
513 |
|
static long runAtomic(int n, long iters) throws Exception { |
514 |
|
LoopHelpers.BarrierTimer timer = new LoopHelpers.BarrierTimer(); |
515 |
|
CyclicBarrier b = new CyclicBarrier(n+1, timer); |
516 |
|
AtomicInteger a = new AtomicInteger(); |
517 |
< |
for (int j = 0; j < n; ++j) |
517 |
> |
for (int j = 0; j < n; ++j) |
518 |
|
new Thread(new AtomicLoop(iters, a, b)).start(); |
519 |
|
b.await(); |
520 |
|
b.await(); |
526 |
|
LoopHelpers.BarrierTimer timer = new LoopHelpers.BarrierTimer(); |
527 |
|
CyclicBarrier b = new CyclicBarrier(n+1, timer); |
528 |
|
VolatileInteger a = new VolatileInteger(); |
529 |
< |
for (int j = 0; j < n; ++j) |
529 |
> |
for (int j = 0; j < n; ++j) |
530 |
|
new Thread(new VolatileLoop(iters, a, b)).start(); |
531 |
|
b.await(); |
532 |
|
b.await(); |
534 |
|
return timer.getTime(); |
535 |
|
} |
536 |
|
|
449 |
– |
|
537 |
|
static long runSynched(int n, long iters) throws Exception { |
538 |
|
LoopHelpers.BarrierTimer timer = new LoopHelpers.BarrierTimer(); |
539 |
|
CyclicBarrier b = new CyclicBarrier(n+1, timer); |
540 |
|
SynchedInteger a = new SynchedInteger(); |
541 |
< |
for (int j = 0; j < n; ++j) |
541 |
> |
for (int j = 0; j < n; ++j) |
542 |
|
new Thread(new SynchedLoop(iters, a, b)).start(); |
543 |
|
b.await(); |
544 |
|
b.await(); |
550 |
|
LoopHelpers.BarrierTimer timer = new LoopHelpers.BarrierTimer(); |
551 |
|
CyclicBarrier b = new CyclicBarrier(n+1, timer); |
552 |
|
LockedInteger a = new LockedInteger(); |
553 |
< |
for (int j = 0; j < n; ++j) |
553 |
> |
for (int j = 0; j < n; ++j) |
554 |
|
new Thread(new LockedLoop(iters, a, b)).start(); |
555 |
|
b.await(); |
556 |
|
b.await(); |
558 |
|
return timer.getTime(); |
559 |
|
} |
560 |
|
|
561 |
< |
static void report(String tag, long runtime, long basetime, long iters) { |
561 |
> |
static void report(String tag, long runtime, long basetime, |
562 |
> |
int nthreads, long iters) { |
563 |
|
System.out.print(tag); |
564 |
< |
System.out.print(LoopHelpers.rightJustify((runtime - basetime) / iters) + " ns"); |
565 |
< |
double secs = (double)(runtime) / 1000000000.0; |
564 |
> |
long t = (runtime - basetime) / iters; |
565 |
> |
if (nthreads > NCPUS) |
566 |
> |
t = t * NCPUS / nthreads; |
567 |
> |
System.out.print(LoopHelpers.rightJustify(t)); |
568 |
> |
double secs = (double) runtime / 1000000000.0; |
569 |
|
System.out.println("\t " + secs + "s run time"); |
570 |
|
} |
480 |
– |
|
571 |
|
|
572 |
|
static void oneRun(int i, long iters, boolean print) throws Exception { |
483 |
– |
System.out.println("threads : " + i + |
484 |
– |
" base iters per thread per run : " + |
485 |
– |
LoopHelpers.rightJustify(loopIters[i])); |
486 |
– |
long ntime = runNonAtomic(i, iters); |
573 |
|
if (print) |
574 |
< |
report("Base : ", ntime, ntime, iters); |
574 |
> |
System.out.println("threads : " + i + |
575 |
> |
" base iters per thread per run : " + |
576 |
> |
LoopHelpers.rightJustify(loopIters[i])); |
577 |
> |
long ntime = runNonAtomic(i, iters); |
578 |
> |
if (print) |
579 |
> |
report("Base : ", ntime, ntime, i, iters); |
580 |
|
Thread.sleep(100L); |
581 |
|
long atime = runAtomic(i, iters); |
582 |
|
if (print) |
583 |
< |
report("Atomic CAS : ", atime, ntime, iters); |
583 |
> |
report("Atomic CAS : ", atime, ntime, i, iters); |
584 |
> |
Thread.sleep(100L); |
585 |
> |
long gtime = runUpdaterAtomic(i, iters); |
586 |
> |
if (print) |
587 |
> |
report("Updater CAS : ", gtime, ntime, i, iters); |
588 |
|
Thread.sleep(100L); |
589 |
|
long vtime = runVolatile(i, iters); |
590 |
|
if (print) |
591 |
< |
report("Volatile : ", vtime, ntime, iters); |
591 |
> |
report("Volatile : ", vtime, ntime, i, iters); |
592 |
|
|
593 |
|
Thread.sleep(100L); |
594 |
|
if (!includeLocks) return; |
595 |
|
long mtime = runLocked(i, iters); |
596 |
|
if (print) |
597 |
< |
report("Mutex : ", mtime, ntime, iters); |
597 |
> |
report("Mutex : ", mtime, ntime, i, iters); |
598 |
|
Thread.sleep(100L); |
599 |
|
long stime = runSynched(i, iters); |
600 |
< |
if (print) |
601 |
< |
report("Synchronized: ", stime, ntime, iters); |
600 |
> |
if (print) |
601 |
> |
report("Synchronized: ", stime, ntime, i, iters); |
602 |
|
Thread.sleep(100L); |
603 |
|
} |
509 |
– |
|
510 |
– |
|
604 |
|
} |