use of jsr166y.RecursiveAction in project h2o-3 by h2oai.
the class Gram method cholesky.
/**
* Compute the Cholesky decomposition.
*
* In case our gram starts with diagonal submatrix of dimension N, we exploit this fact to reduce the complexity of the problem.
* We use the standard decomposition of the Cholesky factorization into submatrices.
*
* We split the Gram into 3 regions (4 but we only consider lower diagonal, sparse means diagonal region in this context):
* diagonal
* diagonal*dense
* dense*dense
* Then we can solve the Cholesky in 3 steps:
* 1. We solve the diagonal part right away (just do the sqrt of the elements).
* 2. The diagonal*dense part is simply divided by the sqrt of diagonal.
* 3. Compute Cholesky of dense*dense - outer product of Cholesky of diagonal*dense computed in previous step
*
* @param chol
* @return the Cholesky decomposition
*/
public Cholesky cholesky(Cholesky chol, boolean parallelize, String id) {
long start = System.currentTimeMillis();
if (chol == null) {
double[][] xx = _xx.clone();
for (int i = 0; i < xx.length; ++i) xx[i] = xx[i].clone();
chol = new Cholesky(xx, _diag.clone());
}
final Cholesky fchol = chol;
final int sparseN = _diag.length;
final int denseN = _fullN - sparseN;
// compute the cholesky of the diagonal and diagonal*dense parts
if (_diag != null)
for (int i = 0; i < sparseN; ++i) {
double d = 1.0 / (chol._diag[i] = Math.sqrt(_diag[i]));
for (int j = 0; j < denseN; ++j) chol._xx[j][i] = d * _xx[j][i];
}
ForkJoinTask[] fjts = new ForkJoinTask[denseN];
// compute the outer product of diagonal*dense
//Log.info("SPARSEN = " + sparseN + " DENSEN = " + denseN);
final int[][] nz = new int[denseN][];
for (int i = 0; i < denseN; ++i) {
final int fi = i;
fjts[i] = new RecursiveAction() {
@Override
protected void compute() {
int[] tmp = new int[sparseN];
double[] rowi = fchol._xx[fi];
int n = 0;
for (int k = 0; k < sparseN; ++k) if (rowi[k] != .0)
tmp[n++] = k;
nz[fi] = Arrays.copyOf(tmp, n);
}
};
}
ForkJoinTask.invokeAll(fjts);
for (int i = 0; i < denseN; ++i) {
final int fi = i;
fjts[i] = new RecursiveAction() {
@Override
protected void compute() {
double[] rowi = fchol._xx[fi];
int[] nzi = nz[fi];
for (int j = 0; j <= fi; ++j) {
double[] rowj = fchol._xx[j];
int[] nzj = nz[j];
double s = 0;
for (int t = 0, z = 0; t < nzi.length && z < nzj.length; ) {
int k1 = nzi[t];
int k2 = nzj[z];
if (k1 < k2) {
t++;
continue;
} else if (k1 > k2) {
z++;
continue;
} else {
s += rowi[k1] * rowj[k1];
t++;
z++;
}
}
rowi[j + sparseN] = _xx[fi][j + sparseN] - s;
}
}
};
}
ForkJoinTask.invokeAll(fjts);
// compute the cholesky of dense*dense-outer_product(diagonal*dense)
double[][] arr = new double[denseN][];
for (int i = 0; i < arr.length; ++i) arr[i] = Arrays.copyOfRange(fchol._xx[i], sparseN, sparseN + denseN);
int p = Runtime.getRuntime().availableProcessors();
InPlaceCholesky d = InPlaceCholesky.decompose_2(arr, 10, p);
fchol.setSPD(d.isSPD());
arr = d.getL();
for (int i = 0; i < arr.length; ++i) System.arraycopy(arr[i], 0, fchol._xx[i], sparseN, i + 1);
return chol;
}
use of jsr166y.RecursiveAction in project h2o-2 by h2oai.
the class DataAdapter method shrink.
public void shrink() {
if (_jobKey != null && !Job.isRunning(_jobKey))
throw new Job.JobCancelledException();
// for ( Col c: _c) c.shrink();
// sort columns in parallel: c.shrink() calls single-threaded Arrays.sort()
RecursiveAction[] ras = new RecursiveAction[_c.length];
int i = 0;
for (final Col c : _c) {
ras[i++] = new RecursiveAction() {
@Override
public void compute() {
c.shrink();
}
};
}
ForkJoinTask.invokeAll(ras);
}
use of jsr166y.RecursiveAction in project h2o-2 by h2oai.
the class Gram method cholesky.
/**
* Compute the cholesky decomposition.
*
* In case our gram starts with diagonal submatrix of dimension N, we exploit this fact to reduce the complexity of the problem.
* We use the standard decomposition of the cholesky factorization into submatrices.
*
* We split the Gram into 3 regions (4 but we only consider lower diagonal, sparse means diagonal region in this context):
* diagonal
* diagonal*dense
* dense*dense
* Then we can solve the cholesky in 3 steps:
* 1. We solve the diagnonal part right away (just do the sqrt of the elements).
* 2. The diagonal*dense part is simply divided by the sqrt of diagonal.
* 3. Compute Cholesky of dense*dense - outer product of cholesky of diagonal*dense computed in previous step
*
* @param chol
* @return
*/
public Cholesky cholesky(Cholesky chol, boolean parallelize, String id) {
long start = System.currentTimeMillis();
if (chol == null) {
double[][] xx = _xx.clone();
for (int i = 0; i < xx.length; ++i) xx[i] = xx[i].clone();
chol = new Cholesky(xx, _diag.clone());
}
final Cholesky fchol = chol;
final int sparseN = _diag.length;
final int denseN = _fullN - sparseN;
// compute the cholesky of the diagonal and diagonal*dense parts
if (_diag != null)
for (int i = 0; i < sparseN; ++i) {
double d = 1.0 / (chol._diag[i] = Math.sqrt(_diag[i]));
for (int j = 0; j < denseN; ++j) chol._xx[j][i] = d * _xx[j][i];
}
ForkJoinTask[] fjts = new ForkJoinTask[denseN];
// compute the outer product of diagonal*dense
//Log.info("SPARSEN = " + sparseN + " DENSEN = " + denseN);
final int[][] nz = new int[denseN][];
for (int i = 0; i < denseN; ++i) {
final int fi = i;
fjts[i] = new RecursiveAction() {
@Override
protected void compute() {
int[] tmp = new int[sparseN];
double[] rowi = fchol._xx[fi];
int n = 0;
for (int k = 0; k < sparseN; ++k) if (rowi[k] != .0)
tmp[n++] = k;
nz[fi] = Arrays.copyOf(tmp, n);
}
};
}
ForkJoinTask.invokeAll(fjts);
for (int i = 0; i < denseN; ++i) {
final int fi = i;
fjts[i] = new RecursiveAction() {
@Override
protected void compute() {
double[] rowi = fchol._xx[fi];
int[] nzi = nz[fi];
for (int j = 0; j <= fi; ++j) {
double[] rowj = fchol._xx[j];
int[] nzj = nz[j];
double s = 0;
for (int t = 0, z = 0; t < nzi.length && z < nzj.length; ) {
int k1 = nzi[t];
int k2 = nzj[z];
if (k1 < k2) {
t++;
continue;
} else if (k1 > k2) {
z++;
continue;
} else {
s += rowi[k1] * rowj[k1];
t++;
z++;
}
}
rowi[j + sparseN] = _xx[fi][j + sparseN] - s;
}
}
};
}
ForkJoinTask.invokeAll(fjts);
// compute the cholesky of dense*dense-outer_product(diagonal*dense)
// TODO we still use Jama, which requires (among other things) copy and expansion of the matrix. Do it here without copy and faster.
double[][] arr = new double[denseN][];
for (int i = 0; i < arr.length; ++i) arr[i] = Arrays.copyOfRange(fchol._xx[i], sparseN, sparseN + denseN);
// Log.info(id + ": CHOLESKY PRECOMPUTE TIME " + (System.currentTimeMillis() - start));
start = System.currentTimeMillis();
// parallelize cholesky
if (parallelize) {
int p = Runtime.getRuntime().availableProcessors();
InPlaceCholesky d = InPlaceCholesky.decompose_2(arr, 10, p);
fchol.setSPD(d.isSPD());
arr = d.getL();
// Log.info (id + ": H2O CHOLESKY DECOMP TAKES: " + (System.currentTimeMillis()-start));
} else {
// make it symmetric
for (int i = 0; i < arr.length; ++i) for (int j = 0; j < i; ++j) arr[j][i] = arr[i][j];
CholeskyDecomposition c = new Matrix(arr).chol();
fchol.setSPD(c.isSPD());
arr = c.getL().getArray();
//Log.info ("JAMA CHOLESKY DECOMPOSE TAKES: " + (System.currentTimeMillis()-start));
}
for (int i = 0; i < arr.length; ++i) System.arraycopy(arr[i], 0, fchol._xx[i], sparseN, i + 1);
return chol;
}
use of jsr166y.RecursiveAction in project h2o-3 by h2oai.
the class Gram method qrCholesky.
/**
* Compute Cholesky decompostion by computing partial QR decomposition (R == LU).
*
* The advantage of this method over the standard solve is that it can deal with Non-SPD matrices.
* Gram matrix comes out as Non-SPD if we have collinear columns.
* QR decomposition can identify collinear (redundant) columns and remove them from the dataset.
*
* QR computation:
* QR is computed using Gram-Schmidt elimination, using Gram matrix instead of the underlying dataset.
*
* Gram-schmidt decomposition can be computed as follows: (from "The Eelements of Statistical Learning")
* 1. set z0 = x0 = 1 (Intercept)
* 2. for j = 1:p
* for l = 1:j-1
* gamma_jl = dot(x_l,x_j)/dot(x_l,x_l)
* zj = xj - sum(gamma_j[l]*x_l)
* if(zj ~= 0) xj was redundant (collinear)
* Zjs are orthogonal projections of xk and form base of the X space. (dot(z_i,z_j) == 0 for i != j)
* In the end, gammas contain (Scaled) R from the QR decomp which is == LU from cholesky decomp.
*
*
* Note that all of these operations can be be computed from the Gram matrix only, as gram matrix contains
* dot(x_i,x_j) for i = 1..N, j = 1..N
*
* We can obviously compute gamma_lk directly, instead of replacing xk with zk, we fix the gram matrix.
* When doing that, we need to replace dot(xi,xk) with dot(xi,zk) for all i.
* There are two cases,
* 1) dot(xk,xk) -> dot(zk,zk)
* dot(xk - sum(gamma_l*x_l,xk - sum(gamma_l*x_l)
* = dot(xk,xk) - 2* sum(gamma_l*dot(x_i,x_k) + sum(gamma_l*sum(gamma_k*dot(x_l,x_k)))
* (can be simplified using the fact that dot(zi,zj) == 0 for i != j
* 2) dot(xi,xk) -> dot(xi,zk) for i != k
* dot(xi, xj - sum(gamma_l*x_l))
* = dot(xi, xj) - dot(xi,sum(gamma_l*x_l))
* = dot(xi,xj) - sum(gamma_l*dot(xi,x_l)) (linear combination
*
* The algorithm then goes as follows:
* for j = 1:n
* for l = 1:j-1
* compute gamma_jl
* update gram by replacing xk with zk = xk- sum(gamma_jl*s*xl);
*
* @param dropped_cols - empty list which will be filled with collinear columns removed during computation
* @return Cholesky - cholesky decomposition fo the gram
*/
public Cholesky qrCholesky(ArrayList<Integer> dropped_cols, boolean standardized) {
final double[][] Z = getXX(true, true);
final double[][] R = new double[Z.length][];
final double[] Zdiag = new double[Z.length];
final double[] ZdiagInv = new double[Z.length];
for (int i = 0; i < Z.length; ++i) ZdiagInv[i] = 1.0 / (Zdiag[i] = Z[i][i]);
for (int j = 0; j < Z.length; ++j) {
final double[] gamma = R[j] = new double[j + 1];
for (// compute gamma_l_j
int l = 0; // compute gamma_l_j
l <= j; // compute gamma_l_j
++l) gamma[l] = Z[j][l] * ZdiagInv[l];
double zjj = Z[j][j];
for (// only need the diagonal, the rest is 0 (dot product of orthogonal vectors)
int k = 0; // only need the diagonal, the rest is 0 (dot product of orthogonal vectors)
k < j; // only need the diagonal, the rest is 0 (dot product of orthogonal vectors)
++k) zjj += gamma[k] * (gamma[k] * Z[k][k] - 2 * Z[j][k]);
// Check R^2 for the current column and ignore if too high (1-R^2 too low), R^2 = 1- rs_res/rs_tot
// rs_res = zjj (the squared residual)
// rs_tot = sum((yi - mean(y))^2) = mean(y^2) - mean(y)^2,
// mean(y^2) is on diagonal
// mean(y) is in the intercept (0 if standardized)
// might not be regularized with number of observations, that's why dividing by intercept diagonal
double rs_tot = standardized ? ZdiagInv[j] : 1.0 / (Zdiag[j] - Z[j][0] * ZdiagInv[0] * Z[j][0]);
if (j > 0 && zjj * rs_tot < r2_eps) {
// collinear column, drop it!
zjj = 0;
dropped_cols.add(j - 1);
ZdiagInv[j] = 0;
} else
ZdiagInv[j] = 1. / zjj;
Z[j][j] = zjj;
int jchunk = Math.max(1, MIN_PAR / (Z.length - j));
int nchunks = (Z.length - j - 1) / jchunk;
nchunks = Math.min(nchunks, H2O.NUMCPUS);
if (nchunks <= 1) {
// single trheaded update
updateZ(gamma, Z, j);
} else {
// multi-threaded update
final int fjchunk = (Z.length - 1 - j) / nchunks;
int rem = Z.length - 1 - j - fjchunk * nchunks;
for (int i = Z.length - rem; i < Z.length; ++i) updateZij(i, j, Z, gamma);
RecursiveAction[] ras = new RecursiveAction[nchunks];
final int fj = j;
int k = 0;
for (int i = j + 1; i < Z.length - rem; i += fjchunk) {
// update xj to zj //
final int fi = i;
ras[k++] = new RecursiveAction() {
@Override
protected final void compute() {
int max_i = Math.min(fi + fjchunk, Z.length);
for (int i = fi; i < max_i; ++i) updateZij(i, fj, Z, gamma);
}
};
}
ForkJoinTask.invokeAll(ras);
}
}
// update the R - we computed Rt/sqrt(diag(Z)) which we can directly use to solve the problem
if (R.length < 500)
for (int i = 0; i < R.length; ++i) for (int j = 0; j <= i; ++j) R[i][j] *= Math.sqrt(Z[j][j]);
else {
RecursiveAction[] ras = new RecursiveAction[R.length];
for (int i = 0; i < ras.length; ++i) {
final int fi = i;
final double[] Rrow = R[i];
ras[i] = new RecursiveAction() {
@Override
protected void compute() {
for (int j = 0; j <= fi; ++j) Rrow[j] *= Math.sqrt(Z[j][j]);
}
};
}
ForkJoinTask.invokeAll(ras);
}
// drop the ignored cols
if (dropped_cols.isEmpty())
return new Cholesky(R, new double[0], true);
double[][] Rnew = new double[R.length - dropped_cols.size()][];
for (int i = 0; i < Rnew.length; ++i) Rnew[i] = new double[i + 1];
int j = 0;
for (int i = 0; i < R.length; ++i) {
if (Z[i][i] == 0)
continue;
int k = 0;
for (int l = 0; l <= i; ++l) {
if (k < dropped_cols.size() && l == (dropped_cols.get(k) + 1)) {
++k;
continue;
}
Rnew[j][l - k] = R[i][l];
}
++j;
}
return new Cholesky(Rnew, new double[0], true);
}
use of jsr166y.RecursiveAction in project h2o-3 by h2oai.
the class KVSpeedTest method testMillionInsertKeys.
@Test
@Ignore
public void testMillionInsertKeys() {
final int PAR = 100;
// PAR*NKEY = 100M keys
final int NKEY = 100000;
// PAR*WARMKEY = 1M keys
final int WARMKEY = 10000;
H2O.H2OCountedCompleter foo = H2O.submitTask(new H2O.H2OCountedCompleter() {
final Key[][] keys = new Key[PAR][NKEY];
final Value[][] vals = new Value[PAR][NKEY];
@Override
public void compute2() {
long now, start = System.currentTimeMillis();
ArrayList<RecursiveAction> rs = new ArrayList<>();
// Make a zillion keys
for (int i = 0; i < PAR; ++i) {
final int fi = i;
rs.add(new RecursiveAction() {
@Override
public void compute() {
// Make & insert Keys in parallel
for (int j = 0; j < NKEY; j++) keys[fi][j] = Key.make("Q" + (fi * NKEY + j));
}
});
}
ForkJoinTask.invokeAll(rs);
now = System.currentTimeMillis();
System.out.println("create msec=" + (now - start) + ", msec/op=" + ((double) (now - start)) / PAR / NKEY);
start = now;
// Warmup hashmap
for (int X = 0; X < 4; X++) {
for (int i = 0; i < PAR; ++i) {
final int fi = i;
rs.add(new RecursiveAction() {
@Override
public void compute() {
// Make & insert Keys in parallel
for (int j = 0; j < WARMKEY; j++) {
Key k = keys[fi][j];
H2O.putIfMatch(k, vals[fi][j] = new Value(k, ""), null);
}
}
});
}
ForkJoinTask.invokeAll(rs);
for (int i = 0; i < PAR; ++i) {
final int fi = i;
rs.set(fi, new RecursiveAction() {
@Override
public void compute() {
// Make & insert Keys in parallel
for (int j = 0; j < WARMKEY; j++) H2O.putIfMatch(keys[fi][j], null, vals[fi][j]);
}
});
}
ForkJoinTask.invokeAll(rs);
now = System.currentTimeMillis();
System.out.println("warmup msec=" + (now - start) + ", msec/op=" + ((double) (now - start)) / PAR / WARMKEY);
try {
Thread.sleep(1000);
} catch (InterruptedException ie) {
}
start = System.currentTimeMillis();
}
System.out.println("Starting insert work");
// Make a zillion Values in parallel
for (int i = 0; i < PAR; ++i) {
final int fi = i;
rs.add(new RecursiveAction() {
@Override
public void compute() {
// Make & insert Keys in parallel
for (int j = 0; j < NKEY; j++) vals[fi][j] = new Value(keys[fi][j], "");
}
});
}
ForkJoinTask.invokeAll(rs);
now = System.currentTimeMillis();
System.out.println("Values msec=" + (now - start) + ", msec/op=" + ((double) (now - start)) / PAR / NKEY);
start = now;
// Insert a zillion keys in parallel
for (int i = 0; i < PAR; ++i) {
final int fi = i;
rs.add(new RecursiveAction() {
@Override
public void compute() {
// Make & insert Keys in parallel
for (int j = 0; j < NKEY; j++) {
Key k = keys[fi][j];
H2O.putIfMatch(k, vals[fi][j], null);
}
}
});
}
ForkJoinTask.invokeAll(rs);
now = System.currentTimeMillis();
System.out.println("insert msec=" + (now - start) + ", msec/op=" + ((double) (now - start)) / PAR / NKEY);
start = now;
// Now remove them all
for (int i = 0; i < PAR; ++i) {
final int fi = i;
rs.set(fi, new RecursiveAction() {
@Override
public void compute() {
// Make & insert Keys in parallel
for (int j = 0; j < NKEY; j++) H2O.putIfMatch(keys[fi][j], null, vals[fi][j]);
}
});
}
ForkJoinTask.invokeAll(rs);
now = System.currentTimeMillis();
System.out.println("remove msec=" + (now - start) + ", msec/op=" + ((double) (now - start)) / PAR / NKEY);
start = now;
tryComplete();
}
});
foo.join();
System.out.printf("STORE size = %d\n", H2O.STORE.size());
System.out.printf("STORE raw array length = %d\n", H2O.STORE.raw_array().length);
}
Aggregations