use of hex.FrameTask.DataInfo in project h2o-2 by h2oai.
the class GLMTest2 method testProximal.
@Test
public void testProximal() {
// glmnet's result:
// res2 <- glmnet(x=M,y=D$CAPSULE,lower.limits=-.5,upper.limits=.5,family='binomial')
// res2$beta[,58]
// AGE RACE DPROS PSA VOL GLEASON
// -0.00616326 -0.50000000 0.50000000 0.03628192 -0.01249324 0.50000000 // res2$a0[100]
// res2$a0[58]
// s57
// -4.155864
// lambda = 0.001108, null dev = 512.2888, res dev = 379.7597
Key parsed = Key.make("prostate_parsed");
Key modelKey = Key.make("prostate_model");
GLMModel model = null;
Frame fr = getFrameForFile(parsed, "smalldata/logreg/prostate.csv", new String[] { "ID" }, "CAPSULE");
Key k = Key.make("rebalanced");
H2O.submitTask(new RebalanceDataSet(fr, k, 64)).join();
fr.delete();
fr = DKV.get(k).get();
fr.remove("ID");
Key betaConsKey = Key.make("beta_constraints");
//String[] cfs1 = new String[]{"RACE", "AGE", "DPROS", "DCAPS", "PSA", "VOL", "GLEASON","Intercept"};
//double[] vals = new double[]{0, 0, 0.54788332,0.53816534, 0.02380097, 0, 0.98115670,-8.945984};
// [AGE, RACE, DPROS, DCAPS, PSA, VOL, GLEASON, Intercept]
FVecTest.makeByteVec(betaConsKey, "names, beta_given, rho\n AGE, 0.1, 1\nRACE, -0.1, 1 \n DPROS, 10, 1 \n DCAPS, -10, 1 \n PSA, 0, 1\n VOL, 0, 1\nGLEASON, 0, 1\n Intercept, 0, 0 \n");
Frame betaConstraints = ParseDataset2.parse(parsed, new Key[] { betaConsKey });
try {
// H2O differs on intercept and race, same residual deviance though
GLM2.Source src = new GLM2.Source((Frame) fr.clone(), fr.vec("CAPSULE"), false, true);
//.setHighAccuracy().doInit().fork().get();
new GLM2("GLM offset test on prostate.", Key.make(), modelKey, src, Family.binomial).setNonNegative(false).setRegularization(new double[] { 0 }, new double[] { 0.000 }).setBetaConstraints(betaConstraints).setHighAccuracy().doInit().fork().get();
model = DKV.get(modelKey).get();
fr.add("CAPSULE", fr.remove("CAPSULE"));
DataInfo dinfo = new DataInfo(fr, 1, true, false, TransformType.NONE, DataInfo.TransformType.NONE);
GLMIterationTask glmt = new GLMTask.GLMIterationTask(0, null, dinfo, new GLMParams(Family.binomial), false, true, true, model.beta(), 0, 1.0 / 380, ModelUtils.DEFAULT_THRESHOLDS, null).doAll(dinfo._adaptedFrame);
double[] beta = model.beta();
double[] grad = glmt.gradient(0, 0);
for (int i = 0; i < beta.length; ++i) Assert.assertEquals(0, grad[i] + betaConstraints.vec("rho").at(i) * (beta[i] - betaConstraints.vec("beta_given").at(i)), 1e-8);
// now standardized
src = new GLM2.Source((Frame) fr.clone(), fr.vec("CAPSULE"), true, true);
//.setHighAccuracy().doInit().fork().get();
new GLM2("GLM offset test on prostate.", Key.make(), modelKey, src, Family.binomial).setNonNegative(false).setRegularization(new double[] { 0 }, new double[] { 0.000 }).setBetaConstraints(betaConstraints).setHighAccuracy().doInit().fork().get();
model = DKV.get(modelKey).get();
fr.add("CAPSULE", fr.remove("CAPSULE"));
dinfo = new DataInfo(fr, 1, true, false, TransformType.STANDARDIZE, DataInfo.TransformType.NONE);
glmt = new GLMTask.GLMIterationTask(0, null, dinfo, new GLMParams(Family.binomial), false, true, true, model.norm_beta(0), 0, 1.0 / 380, ModelUtils.DEFAULT_THRESHOLDS, null).doAll(dinfo._adaptedFrame);
double[] beta2 = model.norm_beta(0);
double[] grad2 = glmt.gradient(0, 0);
for (int i = 0; i < beta.length - 1; ++i) Assert.assertEquals("grad[" + i + "] != 0", 0, grad2[i] + betaConstraints.vec("rho").at(i) * (beta2[i] - betaConstraints.vec("beta_given").at(i) * dinfo._adaptedFrame.vec(i).sigma()), 1e-8);
Assert.assertEquals("grad[intercept] != 0", 0, grad2[grad2.length - 1], 1e-8);
} finally {
fr.delete();
if (model != null)
model.delete();
}
}
use of hex.FrameTask.DataInfo in project h2o-2 by h2oai.
the class MatrixTest method testMultiplication.
// simple small & dense, compare t(X) %*% X against gram computed by glm task.
@Test
public void testMultiplication() {
Key parsed = Key.make("prostate_parsed");
Futures fs = new Futures();
Frame fr = getFrameForFile(parsed, "smalldata/glm_test/prostate_cat_replaced.csv");
fr.remove("RACE").remove(fs);
Key k = Key.make("rebalanced");
H2O.submitTask(new RebalanceDataSet(fr, k, 64)).join();
fr.delete();
fr = DKV.get(k).get();
Frame tr = DMatrix.transpose(fr);
tr.reloadVecs();
Frame z = DMatrix.mmul(tr, fr);
DataInfo dinfo = new DataInfo(fr, 0, false, false, DataInfo.TransformType.NONE);
GramTask gt = new GramTask(null, dinfo, false, false).doAll(dinfo._adaptedFrame);
gt._gram.mul(gt._nobs);
double[][] gram = gt._gram.getDenseXX();
for (int i = 0; i < gram.length; ++i) for (int j = 0; j < gram[i].length; ++j) assertEquals("position " + i + ", " + j, gram[i][j], z.vec(j).at(i), 1e-4);
fr.delete();
for (Vec v : tr.vecs()) v.remove(fs);
for (Vec v : z.vecs()) v.remove(fs);
// for(Vec v:z2.vecs())
// v.remove(fs);
fs.blockForPending();
checkLeakedKeys();
}
Aggregations