Search in sources :

Example 1 with RecursiveAction

use of jsr166y.RecursiveAction in project h2o-3 by h2oai.

the class Gram method cholesky.

/**
   * Compute the Cholesky decomposition.
   *
   * In case our gram starts with diagonal submatrix of dimension N, we exploit this fact to reduce the complexity of the problem.
   * We use the standard decomposition of the Cholesky factorization into submatrices.
   *
   * We split the Gram into 3 regions (4 but we only consider lower diagonal, sparse means diagonal region in this context):
   *     diagonal
   *     diagonal*dense
   *     dense*dense
   * Then we can solve the Cholesky in 3 steps:
   *  1. We solve the diagonal part right away (just do the sqrt of the elements).
   *  2. The diagonal*dense part is simply divided by the sqrt of diagonal.
   *  3. Compute Cholesky of dense*dense - outer product of Cholesky of diagonal*dense computed in previous step
   *
   * @param chol
   * @return the Cholesky decomposition
   */
public Cholesky cholesky(Cholesky chol, boolean parallelize, String id) {
    long start = System.currentTimeMillis();
    if (chol == null) {
        double[][] xx = _xx.clone();
        for (int i = 0; i < xx.length; ++i) xx[i] = xx[i].clone();
        chol = new Cholesky(xx, _diag.clone());
    }
    final Cholesky fchol = chol;
    final int sparseN = _diag.length;
    final int denseN = _fullN - sparseN;
    // compute the cholesky of the diagonal and diagonal*dense parts
    if (_diag != null)
        for (int i = 0; i < sparseN; ++i) {
            double d = 1.0 / (chol._diag[i] = Math.sqrt(_diag[i]));
            for (int j = 0; j < denseN; ++j) chol._xx[j][i] = d * _xx[j][i];
        }
    ForkJoinTask[] fjts = new ForkJoinTask[denseN];
    // compute the outer product of diagonal*dense
    //Log.info("SPARSEN = " + sparseN + "    DENSEN = " + denseN);
    final int[][] nz = new int[denseN][];
    for (int i = 0; i < denseN; ++i) {
        final int fi = i;
        fjts[i] = new RecursiveAction() {

            @Override
            protected void compute() {
                int[] tmp = new int[sparseN];
                double[] rowi = fchol._xx[fi];
                int n = 0;
                for (int k = 0; k < sparseN; ++k) if (rowi[k] != .0)
                    tmp[n++] = k;
                nz[fi] = Arrays.copyOf(tmp, n);
            }
        };
    }
    ForkJoinTask.invokeAll(fjts);
    for (int i = 0; i < denseN; ++i) {
        final int fi = i;
        fjts[i] = new RecursiveAction() {

            @Override
            protected void compute() {
                double[] rowi = fchol._xx[fi];
                int[] nzi = nz[fi];
                for (int j = 0; j <= fi; ++j) {
                    double[] rowj = fchol._xx[j];
                    int[] nzj = nz[j];
                    double s = 0;
                    for (int t = 0, z = 0; t < nzi.length && z < nzj.length; ) {
                        int k1 = nzi[t];
                        int k2 = nzj[z];
                        if (k1 < k2) {
                            t++;
                            continue;
                        } else if (k1 > k2) {
                            z++;
                            continue;
                        } else {
                            s += rowi[k1] * rowj[k1];
                            t++;
                            z++;
                        }
                    }
                    rowi[j + sparseN] = _xx[fi][j + sparseN] - s;
                }
            }
        };
    }
    ForkJoinTask.invokeAll(fjts);
    // compute the cholesky of dense*dense-outer_product(diagonal*dense)
    double[][] arr = new double[denseN][];
    for (int i = 0; i < arr.length; ++i) arr[i] = Arrays.copyOfRange(fchol._xx[i], sparseN, sparseN + denseN);
    int p = Runtime.getRuntime().availableProcessors();
    InPlaceCholesky d = InPlaceCholesky.decompose_2(arr, 10, p);
    fchol.setSPD(d.isSPD());
    arr = d.getL();
    for (int i = 0; i < arr.length; ++i) System.arraycopy(arr[i], 0, fchol._xx[i], sparseN, i + 1);
    return chol;
}
Also used : RecursiveAction(jsr166y.RecursiveAction) ForkJoinTask(jsr166y.ForkJoinTask)

Example 2 with RecursiveAction

use of jsr166y.RecursiveAction in project h2o-2 by h2oai.

the class DataAdapter method shrink.

public void shrink() {
    if (_jobKey != null && !Job.isRunning(_jobKey))
        throw new Job.JobCancelledException();
    //    for ( Col c: _c) c.shrink();
    // sort columns in parallel: c.shrink() calls single-threaded Arrays.sort()
    RecursiveAction[] ras = new RecursiveAction[_c.length];
    int i = 0;
    for (final Col c : _c) {
        ras[i++] = new RecursiveAction() {

            @Override
            public void compute() {
                c.shrink();
            }
        };
    }
    ForkJoinTask.invokeAll(ras);
}
Also used : RecursiveAction(jsr166y.RecursiveAction)

Example 3 with RecursiveAction

use of jsr166y.RecursiveAction in project h2o-2 by h2oai.

the class Gram method cholesky.

/**
   * Compute the cholesky decomposition.
   *
   * In case our gram starts with diagonal submatrix of dimension N, we exploit this fact to reduce the complexity of the problem.
   * We use the standard decomposition of the cholesky factorization into submatrices.
   *
   * We split the Gram into 3 regions (4 but we only consider lower diagonal, sparse means diagonal region in this context):
   *     diagonal
   *     diagonal*dense
   *     dense*dense
   * Then we can solve the cholesky in 3 steps:
   *  1. We solve the diagnonal part right away (just do the sqrt of the elements).
   *  2. The diagonal*dense part is simply divided by the sqrt of diagonal.
   *  3. Compute Cholesky of dense*dense - outer product of cholesky of diagonal*dense computed in previous step
   *
   * @param chol
   * @return
   */
public Cholesky cholesky(Cholesky chol, boolean parallelize, String id) {
    long start = System.currentTimeMillis();
    if (chol == null) {
        double[][] xx = _xx.clone();
        for (int i = 0; i < xx.length; ++i) xx[i] = xx[i].clone();
        chol = new Cholesky(xx, _diag.clone());
    }
    final Cholesky fchol = chol;
    final int sparseN = _diag.length;
    final int denseN = _fullN - sparseN;
    // compute the cholesky of the diagonal and diagonal*dense parts
    if (_diag != null)
        for (int i = 0; i < sparseN; ++i) {
            double d = 1.0 / (chol._diag[i] = Math.sqrt(_diag[i]));
            for (int j = 0; j < denseN; ++j) chol._xx[j][i] = d * _xx[j][i];
        }
    ForkJoinTask[] fjts = new ForkJoinTask[denseN];
    // compute the outer product of diagonal*dense
    //Log.info("SPARSEN = " + sparseN + "    DENSEN = " + denseN);
    final int[][] nz = new int[denseN][];
    for (int i = 0; i < denseN; ++i) {
        final int fi = i;
        fjts[i] = new RecursiveAction() {

            @Override
            protected void compute() {
                int[] tmp = new int[sparseN];
                double[] rowi = fchol._xx[fi];
                int n = 0;
                for (int k = 0; k < sparseN; ++k) if (rowi[k] != .0)
                    tmp[n++] = k;
                nz[fi] = Arrays.copyOf(tmp, n);
            }
        };
    }
    ForkJoinTask.invokeAll(fjts);
    for (int i = 0; i < denseN; ++i) {
        final int fi = i;
        fjts[i] = new RecursiveAction() {

            @Override
            protected void compute() {
                double[] rowi = fchol._xx[fi];
                int[] nzi = nz[fi];
                for (int j = 0; j <= fi; ++j) {
                    double[] rowj = fchol._xx[j];
                    int[] nzj = nz[j];
                    double s = 0;
                    for (int t = 0, z = 0; t < nzi.length && z < nzj.length; ) {
                        int k1 = nzi[t];
                        int k2 = nzj[z];
                        if (k1 < k2) {
                            t++;
                            continue;
                        } else if (k1 > k2) {
                            z++;
                            continue;
                        } else {
                            s += rowi[k1] * rowj[k1];
                            t++;
                            z++;
                        }
                    }
                    rowi[j + sparseN] = _xx[fi][j + sparseN] - s;
                }
            }
        };
    }
    ForkJoinTask.invokeAll(fjts);
    // compute the cholesky of dense*dense-outer_product(diagonal*dense)
    // TODO we still use Jama, which requires (among other things) copy and expansion of the matrix. Do it here without copy and faster.
    double[][] arr = new double[denseN][];
    for (int i = 0; i < arr.length; ++i) arr[i] = Arrays.copyOfRange(fchol._xx[i], sparseN, sparseN + denseN);
    //    Log.info(id + ": CHOLESKY PRECOMPUTE TIME " + (System.currentTimeMillis() - start));
    start = System.currentTimeMillis();
    // parallelize cholesky
    if (parallelize) {
        int p = Runtime.getRuntime().availableProcessors();
        InPlaceCholesky d = InPlaceCholesky.decompose_2(arr, 10, p);
        fchol.setSPD(d.isSPD());
        arr = d.getL();
    //      Log.info (id + ": H2O CHOLESKY DECOMP TAKES: " + (System.currentTimeMillis()-start));
    } else {
        // make it symmetric
        for (int i = 0; i < arr.length; ++i) for (int j = 0; j < i; ++j) arr[j][i] = arr[i][j];
        CholeskyDecomposition c = new Matrix(arr).chol();
        fchol.setSPD(c.isSPD());
        arr = c.getL().getArray();
    //Log.info ("JAMA CHOLESKY DECOMPOSE TAKES: " + (System.currentTimeMillis()-start));
    }
    for (int i = 0; i < arr.length; ++i) System.arraycopy(arr[i], 0, fchol._xx[i], sparseN, i + 1);
    return chol;
}
Also used : CholeskyDecomposition(Jama.CholeskyDecomposition) Matrix(Jama.Matrix) RecursiveAction(jsr166y.RecursiveAction) ForkJoinTask(jsr166y.ForkJoinTask)

Example 4 with RecursiveAction

use of jsr166y.RecursiveAction in project h2o-3 by h2oai.

the class Gram method qrCholesky.

/**
   * Compute Cholesky decompostion by computing partial QR decomposition (R == LU).
   *
   * The advantage of this method over the standard solve is that it can deal with Non-SPD matrices.
   * Gram matrix comes out as Non-SPD if we have collinear columns.
   * QR decomposition can identify collinear (redundant) columns and remove them from the dataset.
   *
   * QR computation:
   * QR is computed using Gram-Schmidt elimination, using Gram matrix instead of the underlying dataset.
   *
   * Gram-schmidt decomposition can be computed as follows: (from "The Eelements of Statistical Learning")
   * 1. set z0 = x0 = 1 (Intercept)
   * 2. for j = 1:p
   *      for l = 1:j-1
   *        gamma_jl = dot(x_l,x_j)/dot(x_l,x_l)
   *      zj = xj - sum(gamma_j[l]*x_l)
   *      if(zj ~= 0) xj was redundant (collinear)
   * Zjs are orthogonal projections of xk and form base of the X space. (dot(z_i,z_j) == 0 for i != j)
   * In the end, gammas contain (Scaled) R from the QR decomp which is == LU from cholesky decomp.
   *
   *
   * Note that all of these operations can be be computed from the Gram matrix only, as gram matrix contains
   * dot(x_i,x_j) for i = 1..N, j = 1..N
   *
   * We can obviously compute gamma_lk directly, instead of replacing xk with zk, we fix the gram matrix.
   * When doing that, we need to replace dot(xi,xk) with dot(xi,zk) for all i.
   * There are two cases,
   *   1)  dot(xk,xk) -> dot(zk,zk)
   *       dot(xk - sum(gamma_l*x_l,xk - sum(gamma_l*x_l)
   *       = dot(xk,xk) - 2* sum(gamma_l*dot(x_i,x_k) + sum(gamma_l*sum(gamma_k*dot(x_l,x_k)))
   *       (can be simplified using the fact that dot(zi,zj) == 0 for i != j
   *   2)  dot(xi,xk) -> dot(xi,zk) for i != k
   *      dot(xi, xj - sum(gamma_l*x_l))
   *      = dot(xi, xj) - dot(xi,sum(gamma_l*x_l))
   *      = dot(xi,xj) - sum(gamma_l*dot(xi,x_l)) (linear combination
   *
   * The algorithm then goes as follows:
   *   for j = 1:n
   *     for l = 1:j-1
   *       compute gamma_jl
   *     update gram by replacing xk with zk = xk- sum(gamma_jl*s*xl);
   *
   * @param dropped_cols - empty list which will be filled with collinear columns removed during computation
   * @return Cholesky - cholesky decomposition fo the gram
   */
public Cholesky qrCholesky(ArrayList<Integer> dropped_cols, boolean standardized) {
    final double[][] Z = getXX(true, true);
    final double[][] R = new double[Z.length][];
    final double[] Zdiag = new double[Z.length];
    final double[] ZdiagInv = new double[Z.length];
    for (int i = 0; i < Z.length; ++i) ZdiagInv[i] = 1.0 / (Zdiag[i] = Z[i][i]);
    for (int j = 0; j < Z.length; ++j) {
        final double[] gamma = R[j] = new double[j + 1];
        for (// compute gamma_l_j
        int l = 0; // compute gamma_l_j
        l <= j; // compute gamma_l_j
        ++l) gamma[l] = Z[j][l] * ZdiagInv[l];
        double zjj = Z[j][j];
        for (// only need the diagonal, the rest is 0 (dot product of orthogonal vectors)
        int k = 0; // only need the diagonal, the rest is 0 (dot product of orthogonal vectors)
        k < j; // only need the diagonal, the rest is 0 (dot product of orthogonal vectors)
        ++k) zjj += gamma[k] * (gamma[k] * Z[k][k] - 2 * Z[j][k]);
        // Check R^2 for the current column and ignore if too high (1-R^2 too low), R^2 = 1- rs_res/rs_tot
        // rs_res = zjj (the squared residual)
        // rs_tot = sum((yi - mean(y))^2) = mean(y^2) - mean(y)^2,
        //   mean(y^2) is on diagonal
        //   mean(y) is in the intercept (0 if standardized)
        //   might not be regularized with number of observations, that's why dividing by intercept diagonal
        double rs_tot = standardized ? ZdiagInv[j] : 1.0 / (Zdiag[j] - Z[j][0] * ZdiagInv[0] * Z[j][0]);
        if (j > 0 && zjj * rs_tot < r2_eps) {
            // collinear column, drop it!
            zjj = 0;
            dropped_cols.add(j - 1);
            ZdiagInv[j] = 0;
        } else
            ZdiagInv[j] = 1. / zjj;
        Z[j][j] = zjj;
        int jchunk = Math.max(1, MIN_PAR / (Z.length - j));
        int nchunks = (Z.length - j - 1) / jchunk;
        nchunks = Math.min(nchunks, H2O.NUMCPUS);
        if (nchunks <= 1) {
            // single trheaded update
            updateZ(gamma, Z, j);
        } else {
            // multi-threaded update
            final int fjchunk = (Z.length - 1 - j) / nchunks;
            int rem = Z.length - 1 - j - fjchunk * nchunks;
            for (int i = Z.length - rem; i < Z.length; ++i) updateZij(i, j, Z, gamma);
            RecursiveAction[] ras = new RecursiveAction[nchunks];
            final int fj = j;
            int k = 0;
            for (int i = j + 1; i < Z.length - rem; i += fjchunk) {
                // update xj to zj //
                final int fi = i;
                ras[k++] = new RecursiveAction() {

                    @Override
                    protected final void compute() {
                        int max_i = Math.min(fi + fjchunk, Z.length);
                        for (int i = fi; i < max_i; ++i) updateZij(i, fj, Z, gamma);
                    }
                };
            }
            ForkJoinTask.invokeAll(ras);
        }
    }
    // update the R - we computed Rt/sqrt(diag(Z)) which we can directly use to solve the problem
    if (R.length < 500)
        for (int i = 0; i < R.length; ++i) for (int j = 0; j <= i; ++j) R[i][j] *= Math.sqrt(Z[j][j]);
    else {
        RecursiveAction[] ras = new RecursiveAction[R.length];
        for (int i = 0; i < ras.length; ++i) {
            final int fi = i;
            final double[] Rrow = R[i];
            ras[i] = new RecursiveAction() {

                @Override
                protected void compute() {
                    for (int j = 0; j <= fi; ++j) Rrow[j] *= Math.sqrt(Z[j][j]);
                }
            };
        }
        ForkJoinTask.invokeAll(ras);
    }
    // drop the ignored cols
    if (dropped_cols.isEmpty())
        return new Cholesky(R, new double[0], true);
    double[][] Rnew = new double[R.length - dropped_cols.size()][];
    for (int i = 0; i < Rnew.length; ++i) Rnew[i] = new double[i + 1];
    int j = 0;
    for (int i = 0; i < R.length; ++i) {
        if (Z[i][i] == 0)
            continue;
        int k = 0;
        for (int l = 0; l <= i; ++l) {
            if (k < dropped_cols.size() && l == (dropped_cols.get(k) + 1)) {
                ++k;
                continue;
            }
            Rnew[j][l - k] = R[i][l];
        }
        ++j;
    }
    return new Cholesky(Rnew, new double[0], true);
}
Also used : RecursiveAction(jsr166y.RecursiveAction)

Example 5 with RecursiveAction

use of jsr166y.RecursiveAction in project h2o-3 by h2oai.

the class KVSpeedTest method testMillionInsertKeys.

@Test
@Ignore
public void testMillionInsertKeys() {
    final int PAR = 100;
    // PAR*NKEY = 100M keys
    final int NKEY = 100000;
    // PAR*WARMKEY = 1M keys
    final int WARMKEY = 10000;
    H2O.H2OCountedCompleter foo = H2O.submitTask(new H2O.H2OCountedCompleter() {

        final Key[][] keys = new Key[PAR][NKEY];

        final Value[][] vals = new Value[PAR][NKEY];

        @Override
        public void compute2() {
            long now, start = System.currentTimeMillis();
            ArrayList<RecursiveAction> rs = new ArrayList<>();
            // Make a zillion keys
            for (int i = 0; i < PAR; ++i) {
                final int fi = i;
                rs.add(new RecursiveAction() {

                    @Override
                    public void compute() {
                        // Make & insert Keys in parallel
                        for (int j = 0; j < NKEY; j++) keys[fi][j] = Key.make("Q" + (fi * NKEY + j));
                    }
                });
            }
            ForkJoinTask.invokeAll(rs);
            now = System.currentTimeMillis();
            System.out.println("create msec=" + (now - start) + ", msec/op=" + ((double) (now - start)) / PAR / NKEY);
            start = now;
            // Warmup hashmap
            for (int X = 0; X < 4; X++) {
                for (int i = 0; i < PAR; ++i) {
                    final int fi = i;
                    rs.add(new RecursiveAction() {

                        @Override
                        public void compute() {
                            // Make & insert Keys in parallel
                            for (int j = 0; j < WARMKEY; j++) {
                                Key k = keys[fi][j];
                                H2O.putIfMatch(k, vals[fi][j] = new Value(k, ""), null);
                            }
                        }
                    });
                }
                ForkJoinTask.invokeAll(rs);
                for (int i = 0; i < PAR; ++i) {
                    final int fi = i;
                    rs.set(fi, new RecursiveAction() {

                        @Override
                        public void compute() {
                            // Make & insert Keys in parallel
                            for (int j = 0; j < WARMKEY; j++) H2O.putIfMatch(keys[fi][j], null, vals[fi][j]);
                        }
                    });
                }
                ForkJoinTask.invokeAll(rs);
                now = System.currentTimeMillis();
                System.out.println("warmup msec=" + (now - start) + ", msec/op=" + ((double) (now - start)) / PAR / WARMKEY);
                try {
                    Thread.sleep(1000);
                } catch (InterruptedException ie) {
                }
                start = System.currentTimeMillis();
            }
            System.out.println("Starting insert work");
            // Make a zillion Values in parallel
            for (int i = 0; i < PAR; ++i) {
                final int fi = i;
                rs.add(new RecursiveAction() {

                    @Override
                    public void compute() {
                        // Make & insert Keys in parallel
                        for (int j = 0; j < NKEY; j++) vals[fi][j] = new Value(keys[fi][j], "");
                    }
                });
            }
            ForkJoinTask.invokeAll(rs);
            now = System.currentTimeMillis();
            System.out.println("Values msec=" + (now - start) + ", msec/op=" + ((double) (now - start)) / PAR / NKEY);
            start = now;
            // Insert a zillion keys in parallel
            for (int i = 0; i < PAR; ++i) {
                final int fi = i;
                rs.add(new RecursiveAction() {

                    @Override
                    public void compute() {
                        // Make & insert Keys in parallel
                        for (int j = 0; j < NKEY; j++) {
                            Key k = keys[fi][j];
                            H2O.putIfMatch(k, vals[fi][j], null);
                        }
                    }
                });
            }
            ForkJoinTask.invokeAll(rs);
            now = System.currentTimeMillis();
            System.out.println("insert msec=" + (now - start) + ", msec/op=" + ((double) (now - start)) / PAR / NKEY);
            start = now;
            // Now remove them all
            for (int i = 0; i < PAR; ++i) {
                final int fi = i;
                rs.set(fi, new RecursiveAction() {

                    @Override
                    public void compute() {
                        // Make & insert Keys in parallel
                        for (int j = 0; j < NKEY; j++) H2O.putIfMatch(keys[fi][j], null, vals[fi][j]);
                    }
                });
            }
            ForkJoinTask.invokeAll(rs);
            now = System.currentTimeMillis();
            System.out.println("remove msec=" + (now - start) + ", msec/op=" + ((double) (now - start)) / PAR / NKEY);
            start = now;
            tryComplete();
        }
    });
    foo.join();
    System.out.printf("STORE size = %d\n", H2O.STORE.size());
    System.out.printf("STORE raw array length = %d\n", H2O.STORE.raw_array().length);
}
Also used : ArrayList(java.util.ArrayList) RecursiveAction(jsr166y.RecursiveAction) PrettyPrint(water.util.PrettyPrint)

Aggregations

RecursiveAction (jsr166y.RecursiveAction)6 ArrayList (java.util.ArrayList)2 ForkJoinTask (jsr166y.ForkJoinTask)2 CholeskyDecomposition (Jama.CholeskyDecomposition)1 Matrix (Jama.Matrix)1 Job (water.Job)1 Timer (water.Timer)1 PrettyPrint (water.util.PrettyPrint)1