1 |
/* |
2 |
* Written by Doug Lea and released to the public domain, as explained at |
3 |
* http://creativecommons.org/licenses/publicdomain |
4 |
*/ |
5 |
|
6 |
/* |
7 |
* Estimates the difference in time for compareAndSet and CAS-like |
8 |
* operations versus unsynchronized, non-volatile pseudo-CAS when |
9 |
* updating random numbers. These estimates thus give the cost |
10 |
* of atomicity/barriers/exclusion over and above the time to |
11 |
* just compare and conditionally store (int) values, so are |
12 |
* not intended to measure the "raw" cost of a CAS. |
13 |
* |
14 |
* Outputs, in nanoseconds: |
15 |
* "Atomic CAS" AtomicInteger.compareAndSet |
16 |
* "Updater CAS" CAS first comparing args |
17 |
* "Volatile" pseudo-CAS using volatile store if comparison succeeds |
18 |
* "Mutex" emulated compare and set done under AQS-based mutex lock |
19 |
* "Synchronized" emulated compare and set done under a synchronized block. |
20 |
* |
21 |
* By default, these are printed for 1..#cpus threads, but you can |
22 |
* change the upper bound number of threads by providing the |
23 |
* first argument to this program. |
24 |
* |
25 |
* The last two kinds of runs (mutex and synchronized) are done only |
26 |
* if this program is called with (any) second argument |
27 |
*/ |
28 |
|
29 |
|
30 |
import java.util.concurrent.atomic.AtomicInteger; |
31 |
import java.util.concurrent.atomic.AtomicLong; |
32 |
import java.util.concurrent.atomic.AtomicIntegerFieldUpdater; |
33 |
import java.util.concurrent.*; |
34 |
import java.util.concurrent.locks.*; |
35 |
|
36 |
public class CASLoops { |
37 |
|
38 |
static final int TRIALS = 2; |
39 |
static final long BASE_SECS_PER_RUN = 4; |
40 |
static final int NCPUS = Runtime.getRuntime().availableProcessors(); |
41 |
static int maxThreads = NCPUS; |
42 |
|
43 |
static boolean includeLocks = false; |
44 |
|
45 |
public static void main(String[] args) throws Exception { |
46 |
if (args.length > 0) |
47 |
maxThreads = Integer.parseInt(args[0]); |
48 |
|
49 |
if (args.length > 1) |
50 |
includeLocks = true; |
51 |
|
52 |
System.out.println("Warmup..."); |
53 |
for (int i = maxThreads; i > 0; --i) { |
54 |
runCalibration(i, 10); |
55 |
oneRun(i, loopIters[i] / 4, false); |
56 |
System.out.print("."); |
57 |
} |
58 |
|
59 |
for (int i = 1; i <= maxThreads; ++i) |
60 |
loopIters[i] = 0; |
61 |
|
62 |
for (int j = 0; j < 2; ++j) { |
63 |
for (int i = 1; i <= maxThreads; ++i) { |
64 |
runCalibration(i, 1000); |
65 |
oneRun(i, loopIters[i] / 8, false); |
66 |
System.out.print("."); |
67 |
} |
68 |
} |
69 |
|
70 |
for (int i = 1; i <= maxThreads; ++i) |
71 |
loopIters[i] = 0; |
72 |
|
73 |
for (int j = 0; j < TRIALS; ++j) { |
74 |
System.out.println("Trial " + j); |
75 |
for (int i = 1; i <= maxThreads; ++i) { |
76 |
runCalibration(i, BASE_SECS_PER_RUN * 1000L); |
77 |
oneRun(i, loopIters[i], true); |
78 |
} |
79 |
} |
80 |
} |
81 |
|
82 |
static final AtomicLong totalIters = new AtomicLong(0); |
83 |
static final AtomicLong successes = new AtomicLong(0); |
84 |
static final AtomicInteger sum = new AtomicInteger(0); |
85 |
|
86 |
static final LoopHelpers.MarsagliaRandom rng = new LoopHelpers.MarsagliaRandom(); |
87 |
|
88 |
static final long[] loopIters = new long[maxThreads+1]; |
89 |
|
90 |
static final class NonAtomicInteger { |
91 |
volatile int readBarrier; |
92 |
int value; |
93 |
|
94 |
NonAtomicInteger() {} |
95 |
int get() { |
96 |
int junk = readBarrier; |
97 |
return value; |
98 |
} |
99 |
boolean compareAndSet(int cmp, int val) { |
100 |
if (value == cmp) { |
101 |
value = val; |
102 |
return true; |
103 |
} |
104 |
return false; |
105 |
} |
106 |
void set(int val) { value = val; } |
107 |
} |
108 |
|
109 |
static final class UpdaterAtomicInteger { |
110 |
volatile int value; |
111 |
|
112 |
static final AtomicIntegerFieldUpdater<UpdaterAtomicInteger> |
113 |
valueUpdater = AtomicIntegerFieldUpdater.newUpdater |
114 |
(UpdaterAtomicInteger.class, "value"); |
115 |
|
116 |
|
117 |
UpdaterAtomicInteger() {} |
118 |
int get() { |
119 |
return value; |
120 |
} |
121 |
boolean compareAndSet(int cmp, int val) { |
122 |
return valueUpdater.compareAndSet(this, cmp, val); |
123 |
} |
124 |
|
125 |
void set(int val) { value = val; } |
126 |
} |
127 |
|
128 |
static final class VolatileInteger { |
129 |
volatile int value; |
130 |
|
131 |
VolatileInteger() {} |
132 |
int get() { |
133 |
return value; |
134 |
} |
135 |
boolean compareAndSet(int cmp, int val) { |
136 |
if (value == cmp) { |
137 |
value = val; |
138 |
return true; |
139 |
} |
140 |
return false; |
141 |
} |
142 |
void set(int val) { value = val; } |
143 |
} |
144 |
|
145 |
static final class SynchedInteger { |
146 |
int value; |
147 |
|
148 |
SynchedInteger() {} |
149 |
int get() { |
150 |
return value; |
151 |
} |
152 |
synchronized boolean compareAndSet(int cmp, int val) { |
153 |
if (value == cmp) { |
154 |
value = val; |
155 |
return true; |
156 |
} |
157 |
return false; |
158 |
} |
159 |
synchronized void set(int val) { value = val; } |
160 |
} |
161 |
|
162 |
|
163 |
static final class LockedInteger extends AbstractQueuedSynchronizer { |
164 |
int value; |
165 |
LockedInteger() {} |
166 |
|
167 |
public boolean tryAcquire(int acquires) { |
168 |
return compareAndSetState(0, 1); |
169 |
} |
170 |
public boolean tryRelease(int releases) { |
171 |
setState(0); |
172 |
return true; |
173 |
} |
174 |
void lock() { acquire(1); } |
175 |
void unlock() { release(1); } |
176 |
|
177 |
int get() { |
178 |
return value; |
179 |
} |
180 |
boolean compareAndSet(int cmp, int val) { |
181 |
lock(); |
182 |
try { |
183 |
if (value == cmp) { |
184 |
value = val; |
185 |
return true; |
186 |
} |
187 |
return false; |
188 |
} finally { |
189 |
unlock(); |
190 |
} |
191 |
} |
192 |
void set(int val) { |
193 |
lock(); |
194 |
try { |
195 |
value = val; |
196 |
} finally { |
197 |
unlock(); |
198 |
} |
199 |
} |
200 |
} |
201 |
|
202 |
// All these versions are copy-paste-hacked to avoid |
203 |
// contamination with virtual call resolution etc. |
204 |
|
205 |
// Use fixed-length unrollable inner loops to reduce safepoint checks |
206 |
static final int innerPerOuter = 16; |
207 |
|
208 |
static final class NonAtomicLoop implements Runnable { |
209 |
final long iters; |
210 |
final NonAtomicInteger obj; |
211 |
final CyclicBarrier barrier; |
212 |
NonAtomicLoop(long iters, NonAtomicInteger obj, CyclicBarrier b) { |
213 |
this.iters = iters; |
214 |
this.obj = obj; |
215 |
this.barrier = b; |
216 |
obj.set(rng.next()); |
217 |
} |
218 |
|
219 |
public void run() { |
220 |
try { |
221 |
barrier.await(); |
222 |
long i = iters; |
223 |
int y = 0; |
224 |
int succ = 0; |
225 |
while (i > 0) { |
226 |
for (int k = 0; k < innerPerOuter; ++k) { |
227 |
int x = obj.get(); |
228 |
int z = y + LoopHelpers.compute6(x); |
229 |
if (obj.compareAndSet(x, z)) |
230 |
++succ; |
231 |
y = LoopHelpers.compute7(z); |
232 |
} |
233 |
i -= innerPerOuter; |
234 |
} |
235 |
sum.getAndAdd(obj.get()); |
236 |
successes.getAndAdd(succ); |
237 |
barrier.await(); |
238 |
} |
239 |
catch (Exception ie) { |
240 |
return; |
241 |
} |
242 |
} |
243 |
} |
244 |
|
245 |
static final class AtomicLoop implements Runnable { |
246 |
final long iters; |
247 |
final AtomicInteger obj; |
248 |
final CyclicBarrier barrier; |
249 |
AtomicLoop(long iters, AtomicInteger obj, CyclicBarrier b) { |
250 |
this.iters = iters; |
251 |
this.obj = obj; |
252 |
this.barrier = b; |
253 |
obj.set(rng.next()); |
254 |
} |
255 |
|
256 |
public void run() { |
257 |
try { |
258 |
barrier.await(); |
259 |
long i = iters; |
260 |
int y = 0; |
261 |
int succ = 0; |
262 |
while (i > 0) { |
263 |
for (int k = 0; k < innerPerOuter; ++k) { |
264 |
int x = obj.get(); |
265 |
int z = y + LoopHelpers.compute6(x); |
266 |
if (obj.compareAndSet(x, z)) |
267 |
++succ; |
268 |
y = LoopHelpers.compute7(z); |
269 |
} |
270 |
i -= innerPerOuter; |
271 |
} |
272 |
sum.getAndAdd(obj.get()); |
273 |
successes.getAndAdd(succ); |
274 |
barrier.await(); |
275 |
} |
276 |
catch (Exception ie) { |
277 |
return; |
278 |
} |
279 |
} |
280 |
} |
281 |
|
282 |
static final class UpdaterAtomicLoop implements Runnable { |
283 |
final long iters; |
284 |
final UpdaterAtomicInteger obj; |
285 |
final CyclicBarrier barrier; |
286 |
UpdaterAtomicLoop(long iters, UpdaterAtomicInteger obj, CyclicBarrier b) { |
287 |
this.iters = iters; |
288 |
this.obj = obj; |
289 |
this.barrier = b; |
290 |
obj.set(rng.next()); |
291 |
} |
292 |
|
293 |
public void run() { |
294 |
try { |
295 |
barrier.await(); |
296 |
long i = iters; |
297 |
int y = 0; |
298 |
int succ = 0; |
299 |
while (i > 0) { |
300 |
for (int k = 0; k < innerPerOuter; ++k) { |
301 |
int x = obj.get(); |
302 |
int z = y + LoopHelpers.compute6(x); |
303 |
if (obj.compareAndSet(x, z)) |
304 |
++succ; |
305 |
y = LoopHelpers.compute7(z); |
306 |
} |
307 |
i -= innerPerOuter; |
308 |
} |
309 |
sum.getAndAdd(obj.get()); |
310 |
successes.getAndAdd(succ); |
311 |
barrier.await(); |
312 |
} |
313 |
catch (Exception ie) { |
314 |
return; |
315 |
} |
316 |
} |
317 |
} |
318 |
|
319 |
static final class VolatileLoop implements Runnable { |
320 |
final long iters; |
321 |
final VolatileInteger obj; |
322 |
final CyclicBarrier barrier; |
323 |
VolatileLoop(long iters, VolatileInteger obj, CyclicBarrier b) { |
324 |
this.iters = iters; |
325 |
this.obj = obj; |
326 |
this.barrier = b; |
327 |
obj.set(rng.next()); |
328 |
} |
329 |
|
330 |
public void run() { |
331 |
try { |
332 |
barrier.await(); |
333 |
long i = iters; |
334 |
int y = 0; |
335 |
int succ = 0; |
336 |
while (i > 0) { |
337 |
for (int k = 0; k < innerPerOuter; ++k) { |
338 |
int x = obj.get(); |
339 |
int z = y + LoopHelpers.compute6(x); |
340 |
if (obj.compareAndSet(x, z)) |
341 |
++succ; |
342 |
y = LoopHelpers.compute7(z); |
343 |
} |
344 |
i -= innerPerOuter; |
345 |
} |
346 |
sum.getAndAdd(obj.get()); |
347 |
successes.getAndAdd(succ); |
348 |
barrier.await(); |
349 |
} |
350 |
catch (Exception ie) { |
351 |
return; |
352 |
} |
353 |
} |
354 |
} |
355 |
|
356 |
static final class SynchedLoop implements Runnable { |
357 |
final long iters; |
358 |
final SynchedInteger obj; |
359 |
final CyclicBarrier barrier; |
360 |
SynchedLoop(long iters, SynchedInteger obj, CyclicBarrier b) { |
361 |
this.iters = iters; |
362 |
this.obj = obj; |
363 |
this.barrier = b; |
364 |
obj.set(rng.next()); |
365 |
} |
366 |
|
367 |
public void run() { |
368 |
try { |
369 |
barrier.await(); |
370 |
long i = iters; |
371 |
int y = 0; |
372 |
int succ = 0; |
373 |
while (i > 0) { |
374 |
for (int k = 0; k < innerPerOuter; ++k) { |
375 |
int x = obj.get(); |
376 |
int z = y + LoopHelpers.compute6(x); |
377 |
if (obj.compareAndSet(x, z)) |
378 |
++succ; |
379 |
y = LoopHelpers.compute7(z); |
380 |
} |
381 |
i -= innerPerOuter; |
382 |
} |
383 |
sum.getAndAdd(obj.get()); |
384 |
successes.getAndAdd(succ); |
385 |
barrier.await(); |
386 |
} |
387 |
catch (Exception ie) { |
388 |
return; |
389 |
} |
390 |
} |
391 |
} |
392 |
|
393 |
static final class LockedLoop implements Runnable { |
394 |
final long iters; |
395 |
final LockedInteger obj; |
396 |
final CyclicBarrier barrier; |
397 |
LockedLoop(long iters, LockedInteger obj, CyclicBarrier b) { |
398 |
this.iters = iters; |
399 |
this.obj = obj; |
400 |
this.barrier = b; |
401 |
obj.set(rng.next()); |
402 |
} |
403 |
|
404 |
public void run() { |
405 |
try { |
406 |
barrier.await(); |
407 |
long i = iters; |
408 |
int y = 0; |
409 |
int succ = 0; |
410 |
while (i > 0) { |
411 |
for (int k = 0; k < innerPerOuter; ++k) { |
412 |
int x = obj.get(); |
413 |
int z = y + LoopHelpers.compute6(x); |
414 |
if (obj.compareAndSet(x, z)) |
415 |
++succ; |
416 |
y = LoopHelpers.compute7(z); |
417 |
} |
418 |
i -= innerPerOuter; |
419 |
} |
420 |
sum.getAndAdd(obj.get()); |
421 |
successes.getAndAdd(succ); |
422 |
barrier.await(); |
423 |
} |
424 |
catch (Exception ie) { |
425 |
return; |
426 |
} |
427 |
} |
428 |
} |
429 |
|
430 |
static final int loopsPerTimeCheck = 2048; |
431 |
|
432 |
static final class NACalibrationLoop implements Runnable { |
433 |
final long endTime; |
434 |
final NonAtomicInteger obj; |
435 |
final CyclicBarrier barrier; |
436 |
NACalibrationLoop(long endTime, NonAtomicInteger obj, CyclicBarrier b) { |
437 |
this.endTime = endTime; |
438 |
this.obj = obj; |
439 |
this.barrier = b; |
440 |
obj.set(rng.next()); |
441 |
} |
442 |
|
443 |
public void run() { |
444 |
try { |
445 |
barrier.await(); |
446 |
long iters = 0; |
447 |
int y = 0; |
448 |
int succ = 0; |
449 |
do { |
450 |
int i = loopsPerTimeCheck; |
451 |
while (i > 0) { |
452 |
for (int k = 0; k < innerPerOuter; ++k) { |
453 |
int x = obj.get(); |
454 |
int z = y + LoopHelpers.compute6(x); |
455 |
if (obj.compareAndSet(x, z)) |
456 |
++succ; |
457 |
y = LoopHelpers.compute7(z); |
458 |
} |
459 |
i -= innerPerOuter; |
460 |
} |
461 |
iters += loopsPerTimeCheck; |
462 |
} while (System.currentTimeMillis() < endTime); |
463 |
totalIters.getAndAdd(iters); |
464 |
sum.getAndAdd(obj.get()); |
465 |
successes.getAndAdd(succ); |
466 |
barrier.await(); |
467 |
} |
468 |
catch (Exception ie) { |
469 |
return; |
470 |
} |
471 |
} |
472 |
} |
473 |
|
474 |
static void runCalibration(int n, long nms) throws Exception { |
475 |
long now = System.currentTimeMillis(); |
476 |
long endTime = now + nms; |
477 |
CyclicBarrier b = new CyclicBarrier(n+1); |
478 |
totalIters.set(0); |
479 |
NonAtomicInteger a = new NonAtomicInteger(); |
480 |
for (int j = 0; j < n; ++j) |
481 |
new Thread(new NACalibrationLoop(endTime, a, b)).start(); |
482 |
b.await(); |
483 |
b.await(); |
484 |
long ipt = totalIters.get() / n; |
485 |
if (ipt > loopIters[n]) |
486 |
loopIters[n] = ipt; |
487 |
if (sum.get() == 0) System.out.print(" "); |
488 |
} |
489 |
|
490 |
static long runNonAtomic(int n, long iters) throws Exception { |
491 |
LoopHelpers.BarrierTimer timer = new LoopHelpers.BarrierTimer(); |
492 |
CyclicBarrier b = new CyclicBarrier(n+1, timer); |
493 |
NonAtomicInteger a = new NonAtomicInteger(); |
494 |
for (int j = 0; j < n; ++j) |
495 |
new Thread(new NonAtomicLoop(iters, a, b)).start(); |
496 |
b.await(); |
497 |
b.await(); |
498 |
if (sum.get() == 0) System.out.print(" "); |
499 |
return timer.getTime(); |
500 |
} |
501 |
|
502 |
static long runUpdaterAtomic(int n, long iters) throws Exception { |
503 |
LoopHelpers.BarrierTimer timer = new LoopHelpers.BarrierTimer(); |
504 |
CyclicBarrier b = new CyclicBarrier(n+1, timer); |
505 |
UpdaterAtomicInteger a = new UpdaterAtomicInteger(); |
506 |
for (int j = 0; j < n; ++j) |
507 |
new Thread(new UpdaterAtomicLoop(iters, a, b)).start(); |
508 |
b.await(); |
509 |
b.await(); |
510 |
if (sum.get() == 0) System.out.print(" "); |
511 |
return timer.getTime(); |
512 |
} |
513 |
|
514 |
static long runAtomic(int n, long iters) throws Exception { |
515 |
LoopHelpers.BarrierTimer timer = new LoopHelpers.BarrierTimer(); |
516 |
CyclicBarrier b = new CyclicBarrier(n+1, timer); |
517 |
AtomicInteger a = new AtomicInteger(); |
518 |
for (int j = 0; j < n; ++j) |
519 |
new Thread(new AtomicLoop(iters, a, b)).start(); |
520 |
b.await(); |
521 |
b.await(); |
522 |
if (sum.get() == 0) System.out.print(" "); |
523 |
return timer.getTime(); |
524 |
} |
525 |
|
526 |
static long runVolatile(int n, long iters) throws Exception { |
527 |
LoopHelpers.BarrierTimer timer = new LoopHelpers.BarrierTimer(); |
528 |
CyclicBarrier b = new CyclicBarrier(n+1, timer); |
529 |
VolatileInteger a = new VolatileInteger(); |
530 |
for (int j = 0; j < n; ++j) |
531 |
new Thread(new VolatileLoop(iters, a, b)).start(); |
532 |
b.await(); |
533 |
b.await(); |
534 |
if (sum.get() == 0) System.out.print(" "); |
535 |
return timer.getTime(); |
536 |
} |
537 |
|
538 |
|
539 |
static long runSynched(int n, long iters) throws Exception { |
540 |
LoopHelpers.BarrierTimer timer = new LoopHelpers.BarrierTimer(); |
541 |
CyclicBarrier b = new CyclicBarrier(n+1, timer); |
542 |
SynchedInteger a = new SynchedInteger(); |
543 |
for (int j = 0; j < n; ++j) |
544 |
new Thread(new SynchedLoop(iters, a, b)).start(); |
545 |
b.await(); |
546 |
b.await(); |
547 |
if (sum.get() == 0) System.out.print(" "); |
548 |
return timer.getTime(); |
549 |
} |
550 |
|
551 |
static long runLocked(int n, long iters) throws Exception { |
552 |
LoopHelpers.BarrierTimer timer = new LoopHelpers.BarrierTimer(); |
553 |
CyclicBarrier b = new CyclicBarrier(n+1, timer); |
554 |
LockedInteger a = new LockedInteger(); |
555 |
for (int j = 0; j < n; ++j) |
556 |
new Thread(new LockedLoop(iters, a, b)).start(); |
557 |
b.await(); |
558 |
b.await(); |
559 |
if (sum.get() == 0) System.out.print(" "); |
560 |
return timer.getTime(); |
561 |
} |
562 |
|
563 |
static void report(String tag, long runtime, long basetime, |
564 |
int nthreads, long iters) { |
565 |
System.out.print(tag); |
566 |
long t = (runtime - basetime) / iters; |
567 |
if (nthreads > NCPUS) |
568 |
t = t * NCPUS / nthreads; |
569 |
System.out.print(LoopHelpers.rightJustify(t)); |
570 |
double secs = (double)(runtime) / 1000000000.0; |
571 |
System.out.println("\t " + secs + "s run time"); |
572 |
} |
573 |
|
574 |
|
575 |
static void oneRun(int i, long iters, boolean print) throws Exception { |
576 |
if (print) |
577 |
System.out.println("threads : " + i + |
578 |
" base iters per thread per run : " + |
579 |
LoopHelpers.rightJustify(loopIters[i])); |
580 |
long ntime = runNonAtomic(i, iters); |
581 |
if (print) |
582 |
report("Base : ", ntime, ntime, i, iters); |
583 |
Thread.sleep(100L); |
584 |
long atime = runAtomic(i, iters); |
585 |
if (print) |
586 |
report("Atomic CAS : ", atime, ntime, i, iters); |
587 |
Thread.sleep(100L); |
588 |
long gtime = runUpdaterAtomic(i, iters); |
589 |
if (print) |
590 |
report("Updater CAS : ", gtime, ntime, i, iters); |
591 |
Thread.sleep(100L); |
592 |
long vtime = runVolatile(i, iters); |
593 |
if (print) |
594 |
report("Volatile : ", vtime, ntime, i, iters); |
595 |
|
596 |
Thread.sleep(100L); |
597 |
if (!includeLocks) return; |
598 |
long mtime = runLocked(i, iters); |
599 |
if (print) |
600 |
report("Mutex : ", mtime, ntime, i, iters); |
601 |
Thread.sleep(100L); |
602 |
long stime = runSynched(i, iters); |
603 |
if (print) |
604 |
report("Synchronized: ", stime, ntime, i, iters); |
605 |
Thread.sleep(100L); |
606 |
} |
607 |
|
608 |
|
609 |
} |