/*
 * Written by Doug Lea with assistance from members of JCP JSR-166
 * Expert Group and released to the public domain, as explained at
 * http://creativecommons.org/publicdomain/zero/1.0/
 */

import java.util.concurrent.*;
import java.util.*;

class CCCSort {
    static final long NPS = (1000L * 1000 * 1000);
    static int THRESHOLD;
    static Long[] numbers;

    public static void main(String[] args) throws Exception {
        int n = 1 << 22;
        int reps = 30;
        int sreps = 2;
        try {
            if (args.length > 0)
                n = Integer.parseInt(args[0]);
            if (args.length > 1)
                reps = Integer.parseInt(args[1]);
        }
        catch (Exception e) {
            System.out.println("Usage: java CCC n reps");
            return;
        }

        numbers = new Long[n];
        for (int i = 0; i < n; ++i)
            numbers[i] = Long.valueOf(i);

        int thr = ((n + 7) >>> 3) / ForkJoinPool.getCommonPoolParallelism();
        THRESHOLD = (thr <= 1 << 13)? 1 << 13 : thr;

        System.out.println("Threshold = " + THRESHOLD);

        Long[] a = new Long[n];
        ForkJoinPool pool = ForkJoinPool.commonPool();
        seqTest(a, n, 1);
        System.out.println(pool);
        parTest(a, n, reps);
        System.out.println(pool);
        seqTest(a, n, 2);
        System.out.println(pool);
    }

    static void seqTest(Long[] a, int n, int reps) {
        System.out.printf("Sorting %d longs, %d replications\n", n, reps);
        long start = System.nanoTime();
        for (int i = 0; i < reps; ++i) {
            new RandomRepacker(null, numbers, a, 0, n, n).invoke();
            long last = System.nanoTime();
            java.util.Arrays.sort(a);
            long now = System.nanoTime();
            double total = (double)(now - start) / NPS;
            double elapsed = (double)(now - last) / NPS;
            System.out.printf("Arrays.sort   time:  %7.3f total %9.3f\n",
                              elapsed, total);
            new OrderChecker(null, a, 0, n, n).invoke();
        }
    }

    static void parTest(Long[] a, int n, int reps) throws Exception {
        System.out.printf("Sorting %d longs, %d replications\n", n, reps);
        long start = System.nanoTime();
        for (int i = 0; i < reps; ++i) {
            new RandomRepacker(null, numbers, a, 0, n, n).invoke();
            long last = System.nanoTime();
            parallelSort(a, 0, n);
            long now = System.nanoTime();
            double total = (double)(now - start) / NPS;
            double elapsed = (double)(now - last) / NPS;
            System.out.printf("Parallel sort time:  %7.3f total %9.3f\n",
                              elapsed, total);
            new OrderChecker(null, a, 0, n, n).invoke();
        }
    }

    /**
     * The minimum array/subarray size to sort reference arrays in
     * parallel.  Using smaller sizes may generate excessive cache
     * contention.
     */
    static final int MIN_PARALLEL_SORT_UNIT = 1 << 13;

    public static <T extends Comparable<? super T>>
                             void parallelSort(T[] a, int fromIndex, int toIndex) {
        //        checkFromToBounds(a.length, fromIndex, toIndex);
        int n = toIndex - fromIndex;
        if (n <= MIN_PARALLEL_SORT_UNIT) {
            Arrays.sort(a, fromIndex, toIndex);
            return;
        }
        Class<?> tc = a.getClass().getComponentType();
        @SuppressWarnings("unchecked")
            T[] ws = (T[])java.lang.reflect.Array.newInstance(tc, a.length);
        int g = n / (ForkJoinPool.getCommonPoolParallelism() << 3);
        int gran = (g <= MIN_PARALLEL_SORT_UNIT) ? MIN_PARALLEL_SORT_UNIT : g;
        new Sorter<>(null, a, ws, fromIndex, toIndex, 0, gran).invoke();
    }

    /*
     * Merge sort alternates placing elements in the given array vs
     * the workspace array. To make sure the final elements are in the
     * given array, we descend in double steps.  So we need some
     * little tasks to serve as the place holders for triggering the
     * merges and re-merges. These don't need to keep track of the
     * arrays, and are never themselves forked, so are mostly empty.
     */
    static final class EmptyStage extends CountedCompleter<Void> {
        EmptyStage(CountedCompleter<?> p) { super(p); }
        public final void compute() { }
    }

    static final class Relay extends CountedCompleter<Void> {
        final CountedCompleter<?> task;
        Relay(CountedCompleter<?> task) {
            super(null, 1);
            this.task = task;
        }
        public final void compute() { }
        public final void onCompletion(CountedCompleter<?> t) {
            task.compute();
        }
    }

    static final class Sorter<T extends Comparable<? super T>> 
        extends CountedCompleter<Void> {
        final T[] a, w;
        final int base, size, wbase, gran;
        Sorter(CountedCompleter<?> par, T[] a, T[] w, int base, int size, 
               int wbase, int gran) {
            super(par);
            this.a = a; this.w = w; this.base = base; this.size = size; 
            this.wbase = wbase; this.gran = gran;
        }
        public final void compute() {
            CountedCompleter<?> s = this;
            T[] a = this.a, w = this.w; // localize all params
            int b = this.base, n = this.size, wb = this.wbase, g = this.gran;
            while (n > g) {
                int h = n >>> 1, q = n >>> 2, u = h + q; // quartiles
                int qb = b + q, hb = b + h, ub = b + u;  // bases
                int wqb = wb + q, whb = wb + h, wub = wb + u;
                int hq = h - q, hn = n - h, un = n - u;  // sizes
                n = q;
                Relay fc = 
                    new Relay(new Merger<>(s,  w, a, wb,  h, whb, hn, b,  g));
                Relay rc = 
                    new Relay(new Merger<>(fc, a, w, hb, q, ub, un, whb, g));
                Relay bc = 
                    new Relay(new Merger<>(fc, a, w, b,  q, qb, hq, wb,  g));
                s = new EmptyStage(bc);
                Sorter su = new Sorter<>(rc, a, w, ub, un, wub, g);
                Sorter sh = new Sorter<>(rc, a, w, hb, q,  whb, g);
                Sorter sq = new Sorter<>(bc, a, w, qb, hq, wqb, g);
                su.fork();
                sh.fork();
                sq.fork();
            }
            CTS.sort(a, b, b + n, w, wb, n);
            s.tryComplete();
        }
    }

    static final class Merger<T extends Comparable<? super T>> 
        extends CountedCompleter<Void> {
        final T[] a, w; // main and workspace arrays
        final int lbase, lsize, rbase, rsize, wbase, gran;
        Merger(CountedCompleter<?> par, T[] a, T[] w, 
               int lbase, int lsize, int rbase, 
               int rsize, int wbase, int gran) {
            super(par);
            this.a = a; this.w = w;
            this.lbase = lbase; this.lsize = lsize;
            this.rbase = rbase; this.rsize = rsize;
            this.wbase = wbase; this.gran = gran;
        }
        
        /**
         * Merge left and right by splitting left in half, and finding
         * index of right closest to split point.  On each split,
         * ensure that indices correspond to the beginnings of
         * runs. Stop when either side of split becomes too small.
         */
        public final void compute() {
            T[] a = this.a, w = this.w; // localize all params
            int lb = this.lbase, ln = this.lsize, rb = this.rbase, 
                rn = this.rsize, k = this.wbase, g = this.gran;
            while (ln > g) {
                int lh = ln >>> 1;
                T split = a[lb + lh];
                int qg = (g >>> 2) + 1;
                while (lh > qg && split.compareTo(a[lb + lh - 1]) == 0)
                    --lh; // back up to beginning of run
                if (lh <= qg)
                    break;
                int rh = rn;
                for (int rl = 0; rl < rh;) {
                    int rm = (rl + rh) >>> 1;
                    if (split.compareTo(a[rb + rm]) <= 0)
                        rh = rm;
                    else
                        rl = rm + 1;
                }
                addToPendingCount(1);
                new Merger<>(this, a, w, lb + lh, ln - lh, rb + rh, rn - rh, 
                             k + lh + rh, g).fork();
                rn = rh;
                ln = lh;
            }
            
            int lf = lb + ln, rf = rb + rn; // index bounds
            while (lb < lf && rb < rf) {
                T t, al, ar;
                if ((al = a[lb]).compareTo(ar = a[rb]) <= 0) {
                    lb++; t = al;
                }
                else {
                    rb++; t = ar; 
                }
                w[k++] = t;
            }
            if (rb < rf)
                System.arraycopy(a, rb, w, k, rf - rb);
            else if (lb < lf)
                System.arraycopy(a, lb, w, k, lf - lb);
            
            tryComplete();
        }

        public final void zcompute() {
            T[] a = this.a, w = this.w; // localize all params
            int lb = this.lbase, ln = this.lsize, rb = this.rbase, 
                rn = this.rsize, k = this.wbase, g = this.gran;
            while (ln > g) {
                int lh = ln >>> 1, hg = g >>> 1;
                T split = a[lb + lh];
                while (lh > hg && split.compareTo(a[lb + lh - 1]) == 0)
                    --lh; // back up left
                if (lh <= hg)
                    break;
                int rh = rn;
                for (int rl = 0; rl < rh;) {
                    int rm = (rl + rh) >>> 1;
                    if (split.compareTo(a[rb + rm]) <= 0)
                        rh = rm;
                    else
                        rl = rm + 1;
                }
                addToPendingCount(1);
                new Merger<>(this, a, w, lb + lh, ln - lh, rb + rh, rn - rh, 
                             k + lh + rh, g).fork();
                rn = rh;
                ln = lh;
            }
            
            int lf = lb + ln, rf = rb + rn; // index bounds
            while (lb < lf && rb < rf) {
                T t, al, ar;
                if ((al = a[lb]).compareTo(ar = a[rb]) <= 0) {
                    lb++; t = al;
                }
                else {
                    rb++; t = ar; 
                }
                w[k++] = t;
            }
            if (rb < rf)
                System.arraycopy(a, rb, w, k, rf - rb);
            else if (lb < lf)
                System.arraycopy(a, lb, w, k, lf - lb);
            
            tryComplete();
        }

        public final void xxcompute() {
            T[] a = this.a, w = this.w; // localize all params
            int lb = this.lbase, ln = this.lsize, rb = this.rbase, 
                rn = this.rsize, k = this.wbase, g = this.gran;
            for (;;) {
                int lh = ln >>> 1;
                T split = a[lb + lh];
                int hg = g >>> 1;
                while (lh > hg && split.compareTo(a[lb + lh - 1]) == 0)
                    --lh; // back up left
                if (lh <= hg)
                    break;
                int rh = rn;
                for (int rl = 0; rl < rh;) {
                    int rm = (rl + rh) >>> 1;
                    if (split.compareTo(a[rb + rm]) <= 0)
                        rh = rm;
                    else
                        rl = rm + 1;
                }
                int s, sl, sr;
                int eg = g >>> 3;
                if (rh <= eg || (sr = rn - rh) <= eg ||
                    (s = lh + rh) <= g || (sl = ln - lh) + sr <= g)
                    break;
                addToPendingCount(1);
                new Merger<>(this, a, w, lb + lh, sl, rb + rh, sr, k + s, 
                             g).fork();
                rn = rh;
                ln = lh;
            }
            
            int lf = lb + ln, rf = rb + rn; // index bounds
            while (lb < lf && rb < rf) {
                T t, al, ar;
                if ((al = a[lb]).compareTo(ar = a[rb]) <= 0) {
                    lb++; t = al;
                }
                else {
                    rb++; t = ar; 
                }
                w[k++] = t;
            }
            if (rb < rf)
                System.arraycopy(a, rb, w, k, rf - rb);
            else if (lb < lf)
                System.arraycopy(a, lb, w, k, lf - lb);
            
            tryComplete();
        }

        public final void xcompute() {
            T[] a = this.a, w = this.w; // localize all params
            int lb = this.lbase, ln = this.lsize, rb = this.rbase, 
                rn = this.rsize, k = this.wbase, g = this.gran;
            outer: for (;;) {
                int lh = ln >>> 1;
                T split = a[lb + lh];
                int rh = rn;
                for (int rl = 0; rl < rh;) {
                    int rm = (rl + rh) >>> 1;
                    if (split.compareTo(a[rb + rm]) <= 0)
                        rh = rm;
                    else
                        rl = rm + 1;
                }
                if (lh + rh <= g || (ln - lh) + (rn - rh) <= g) // too small
                    break;
                int eighth = g >>> 3;
                if ((rn - rh) <= eighth || rh <= eighth) // too imbalanced
                    break;
                while (split.compareTo(a[rb + rh - 1]) == 0) // back up right
                    if (--rh <= eighth)
                        break outer;
                for (;;) { // back up left
                    if (lh <= eighth)
                        break outer;
                    if (split.compareTo(a[lb + lh - 1]) != 0)
                        break;
                    --lh;
                }
                addToPendingCount(1);
                new Merger<>(this, a, w, lb + lh, ln - lh, 
                             rb + rh, rn - rh, k + lh + rh, g).fork();
                rn = rh;
                ln = lh;
            }
            
            int lf = lb + ln, rf = rb + rn; // index bounds
            while (lb < lf && rb < rf) {
                T t, al, ar;
                if ((al = a[lb]).compareTo(ar = a[rb]) <= 0) {
                    lb++; t = al;
                }
                else {
                    rb++; t = ar; 
                }
                w[k++] = t;
            }
            if (rb < rf)
                System.arraycopy(a, rb, w, k, rf - rb);
            else if (lb < lf)
                System.arraycopy(a, lb, w, k, lf - lb);
            
            tryComplete();
        }

    }


    static void checkSorted(Long[] a) {
        int n = a.length;
        long x = ((Long)a[0]).longValue(), y;
        for (int i = 0; i < n - 1; i++) {
            if (x > (y = ((Long)a[i+1]).longValue()))
                throw new Error("Unsorted at " + i + ": " + x + " / " + y);
            x = y;
        }
    }

    static final class RandomRepacker extends CountedCompleter<Void> {
        final Long[] src;
        final Long[] dst;
        final int lo, hi, size;
        RandomRepacker(CountedCompleter<?> par, Long[] src, Long[] dst,
                       int lo, int hi, int size) {
            super(par);
            this.src = src; this.dst = dst;
            this.lo = lo; this.hi = hi; this.size = size;
        }

        public final void compute() {
            Long[] s = src;
            Long[] d = dst;
            int l = lo, h = hi, n = size;
            while (h - l > THRESHOLD << 1) {
                int m = (l + h) >>> 1;
                addToPendingCount(1);
                new RandomRepacker(this, s, d, m, h, n).fork();
                h = m;
            }
            ThreadLocalRandom rng = ThreadLocalRandom.current();
            Long dl = d[l];
            int m = (dl == null)? h : rng.nextInt(l, h);
            for (int i = l; i < m; ++i)
                d[i] = s[rng.nextInt(n)];
            if (dl != null) {
                dl = d[l];
                for (int i = m; i < h; ++i)
                    d[i] = dl;
            }
            tryComplete();
        }
    }

    static final class OrderChecker extends CountedCompleter<Void> {
        final Long[] array;
        final int lo, hi, size;
        OrderChecker(CountedCompleter<?> par, Long[] a, int lo, int hi, int size) {
            super(par);
            this.array = a;
            this.lo = lo; this.hi = hi; this.size = size;
        }

        public final void compute() {
            Long[] a = this.array;
            int l = lo, h = hi, n = size;
            while (h - l > THRESHOLD) {
                int m = (l + h) >>> 1;
                addToPendingCount(1);
                new OrderChecker(this, a, m, h, n).fork();
                h = m;
            }
            int bound = h < n ? h : n - 1;
            int i = l;
            long x = ((Long)a[i]).longValue(), y;
            while (i < bound) {
                if (x > (y = ((Long)a[++i]).longValue()))
                    throw new Error("Unsorted " + x + " / " + y);
                x = y;
            }
            tryComplete();
        }
    }

}
