Search in sources :

Example 11 with DataInfo

use of hex.FrameTask.DataInfo in project h2o-2 by h2oai.

the class GLMTest2 method testProximal.

@Test
public void testProximal() {
    //    glmnet's result:
    //    res2 <- glmnet(x=M,y=D$CAPSULE,lower.limits=-.5,upper.limits=.5,family='binomial')
    //    res2$beta[,58]
    //    AGE        RACE          DPROS       PSA         VOL         GLEASON
    //    -0.00616326 -0.50000000  0.50000000  0.03628192 -0.01249324  0.50000000 //    res2$a0[100]
    //    res2$a0[58]
    //    s57
    //    -4.155864
    //    lambda = 0.001108, null dev =  512.2888, res dev = 379.7597
    Key parsed = Key.make("prostate_parsed");
    Key modelKey = Key.make("prostate_model");
    GLMModel model = null;
    Frame fr = getFrameForFile(parsed, "smalldata/logreg/prostate.csv", new String[] { "ID" }, "CAPSULE");
    Key k = Key.make("rebalanced");
    H2O.submitTask(new RebalanceDataSet(fr, k, 64)).join();
    fr.delete();
    fr = DKV.get(k).get();
    fr.remove("ID");
    Key betaConsKey = Key.make("beta_constraints");
    //String[] cfs1 = new String[]{"RACE", "AGE", "DPROS", "DCAPS", "PSA", "VOL", "GLEASON","Intercept"};
    //double[] vals = new double[]{0, 0, 0.54788332,0.53816534, 0.02380097, 0, 0.98115670,-8.945984};
    // [AGE, RACE, DPROS, DCAPS, PSA, VOL, GLEASON, Intercept]
    FVecTest.makeByteVec(betaConsKey, "names, beta_given, rho\n AGE, 0.1, 1\nRACE, -0.1, 1 \n DPROS, 10, 1 \n DCAPS, -10, 1 \n PSA, 0, 1\n VOL, 0, 1\nGLEASON, 0, 1\n Intercept, 0, 0 \n");
    Frame betaConstraints = ParseDataset2.parse(parsed, new Key[] { betaConsKey });
    try {
        // H2O differs on intercept and race, same residual deviance though
        GLM2.Source src = new GLM2.Source((Frame) fr.clone(), fr.vec("CAPSULE"), false, true);
        //.setHighAccuracy().doInit().fork().get();
        new GLM2("GLM offset test on prostate.", Key.make(), modelKey, src, Family.binomial).setNonNegative(false).setRegularization(new double[] { 0 }, new double[] { 0.000 }).setBetaConstraints(betaConstraints).setHighAccuracy().doInit().fork().get();
        model = DKV.get(modelKey).get();
        fr.add("CAPSULE", fr.remove("CAPSULE"));
        DataInfo dinfo = new DataInfo(fr, 1, true, false, TransformType.NONE, DataInfo.TransformType.NONE);
        GLMIterationTask glmt = new GLMTask.GLMIterationTask(0, null, dinfo, new GLMParams(Family.binomial), false, true, true, model.beta(), 0, 1.0 / 380, ModelUtils.DEFAULT_THRESHOLDS, null).doAll(dinfo._adaptedFrame);
        double[] beta = model.beta();
        double[] grad = glmt.gradient(0, 0);
        for (int i = 0; i < beta.length; ++i) Assert.assertEquals(0, grad[i] + betaConstraints.vec("rho").at(i) * (beta[i] - betaConstraints.vec("beta_given").at(i)), 1e-8);
        // now standardized
        src = new GLM2.Source((Frame) fr.clone(), fr.vec("CAPSULE"), true, true);
        //.setHighAccuracy().doInit().fork().get();
        new GLM2("GLM offset test on prostate.", Key.make(), modelKey, src, Family.binomial).setNonNegative(false).setRegularization(new double[] { 0 }, new double[] { 0.000 }).setBetaConstraints(betaConstraints).setHighAccuracy().doInit().fork().get();
        model = DKV.get(modelKey).get();
        fr.add("CAPSULE", fr.remove("CAPSULE"));
        dinfo = new DataInfo(fr, 1, true, false, TransformType.STANDARDIZE, DataInfo.TransformType.NONE);
        glmt = new GLMTask.GLMIterationTask(0, null, dinfo, new GLMParams(Family.binomial), false, true, true, model.norm_beta(0), 0, 1.0 / 380, ModelUtils.DEFAULT_THRESHOLDS, null).doAll(dinfo._adaptedFrame);
        double[] beta2 = model.norm_beta(0);
        double[] grad2 = glmt.gradient(0, 0);
        for (int i = 0; i < beta.length - 1; ++i) Assert.assertEquals("grad[" + i + "] != 0", 0, grad2[i] + betaConstraints.vec("rho").at(i) * (beta2[i] - betaConstraints.vec("beta_given").at(i) * dinfo._adaptedFrame.vec(i).sigma()), 1e-8);
        Assert.assertEquals("grad[intercept] != 0", 0, grad2[grad2.length - 1], 1e-8);
    } finally {
        fr.delete();
        if (model != null)
            model.delete();
    }
}
Also used : Source(hex.glm.GLM2.Source) DataInfo(hex.FrameTask.DataInfo) GLMIterationTask(hex.glm.GLMTask.GLMIterationTask) Source(hex.glm.GLM2.Source) GLMIterationTask(hex.glm.GLMTask.GLMIterationTask) Test(org.junit.Test)

Example 12 with DataInfo

use of hex.FrameTask.DataInfo in project h2o-2 by h2oai.

the class MatrixTest method testMultiplication.

// simple small & dense, compare t(X) %*% X against gram computed by glm task.
@Test
public void testMultiplication() {
    Key parsed = Key.make("prostate_parsed");
    Futures fs = new Futures();
    Frame fr = getFrameForFile(parsed, "smalldata/glm_test/prostate_cat_replaced.csv");
    fr.remove("RACE").remove(fs);
    Key k = Key.make("rebalanced");
    H2O.submitTask(new RebalanceDataSet(fr, k, 64)).join();
    fr.delete();
    fr = DKV.get(k).get();
    Frame tr = DMatrix.transpose(fr);
    tr.reloadVecs();
    Frame z = DMatrix.mmul(tr, fr);
    DataInfo dinfo = new DataInfo(fr, 0, false, false, DataInfo.TransformType.NONE);
    GramTask gt = new GramTask(null, dinfo, false, false).doAll(dinfo._adaptedFrame);
    gt._gram.mul(gt._nobs);
    double[][] gram = gt._gram.getDenseXX();
    for (int i = 0; i < gram.length; ++i) for (int j = 0; j < gram[i].length; ++j) assertEquals("position " + i + ", " + j, gram[i][j], z.vec(j).at(i), 1e-4);
    fr.delete();
    for (Vec v : tr.vecs()) v.remove(fs);
    for (Vec v : z.vecs()) v.remove(fs);
    //    for(Vec v:z2.vecs())
    //      v.remove(fs);
    fs.blockForPending();
    checkLeakedKeys();
}
Also used : DataInfo(hex.FrameTask.DataInfo) Frame(water.fvec.Frame) RebalanceDataSet(water.fvec.RebalanceDataSet) NFSFileVec(water.fvec.NFSFileVec) GramTask(hex.gram.Gram.GramTask) Test(org.junit.Test)

Aggregations

DataInfo (hex.FrameTask.DataInfo)12 Frame (water.fvec.Frame)5 Vec (water.fvec.Vec)5 GramTask (hex.gram.Gram.GramTask)3 Test (org.junit.Test)3 RString (water.util.RString)2 TransformType (hex.FrameTask.DataInfo.TransformType)1 Source (hex.glm.GLM2.Source)1 GLMIterationTask (hex.glm.GLMTask.GLMIterationTask)1 File (java.io.File)1 Field (java.lang.reflect.Field)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 Futures (water.Futures)1 NFSFileVec (water.fvec.NFSFileVec)1 RebalanceDataSet (water.fvec.RebalanceDataSet)1 MRUtils.sampleFrame (water.util.MRUtils.sampleFrame)1