Search in sources :

Example 46 with GLMParameters

use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.

the class GLMTest method testCoordinateDescent_anomaly_CovUpdates.

@Test
public void testCoordinateDescent_anomaly_CovUpdates() {
    GLMModel model = null;
    Key parsed = Key.make("anomaly_parsed");
    Key<GLMModel> modelKey = Key.make("anomaly_model");
    Frame fr = parse_test_file(parsed, "smalldata/anomaly/ecg_discord_train.csv");
    try {
        // H2O differs on intercept and race, same residual deviance though
        GLMParameters params = new GLMParameters();
        params._standardize = true;
        params._family = Family.gaussian;
        params._solver = Solver.COORDINATE_DESCENT;
        params._response_column = "C1";
        params._train = fr._key;
        GLM glm = new GLM(params, modelKey);
        model = glm.trainModel().get();
        assertTrue(glm.isStopped());
        System.out.println(model._output._training_metrics);
    } finally {
        fr.delete();
        if (model != null)
            model.delete();
    }
}
Also used : GLMParameters(hex.glm.GLMModel.GLMParameters)

Example 47 with GLMParameters

use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.

the class GLMTest method testCitibikeReproPUBDEV1839.

//PUBDEV-1839
@Test
public void testCitibikeReproPUBDEV1839() throws Exception {
    GLMModel model = null;
    Frame tfr = parse_test_file("smalldata/jira/pubdev_1839_repro_train.csv");
    Frame vfr = parse_test_file("smalldata/jira/pubdev_1839_repro_test.csv");
    try {
        Scope.enter();
        GLMParameters params = new GLMParameters(Family.poisson);
        params._response_column = "bikes";
        params._train = tfr._key;
        params._valid = vfr._key;
        GLM glm = new GLM(params);
        model = glm.trainModel().get();
        testScoring(model, vfr);
    } finally {
        tfr.remove();
        vfr.remove();
        if (model != null)
            model.delete();
        Scope.exit();
    }
}
Also used : GLMParameters(hex.glm.GLMModel.GLMParameters)

Example 48 with GLMParameters

use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.

the class GLMTest method testProximal.

@Test
public void testProximal() {
    //    glmnet's result:
    //    res2 <- glmnet(x=M,y=D$CAPSULE,lower.limits=-.5,upper.limits=.5,family='binomial')
    //    res2$beta[,58]
    //    AGE        RACE          DPROS       PSA         VOL         GLEASON
    //    -0.00616326 -0.50000000  0.50000000  0.03628192 -0.01249324  0.50000000 //    res2$a0[100]
    //    res2$a0[58]
    //    s57
    //    -4.155864
    //    lambda = 0.001108, null dev =  512.2888, res dev = 379.7597
    Key parsed = Key.make("prostate_parsed");
    Key<GLMModel> modelKey = Key.make("prostate_model");
    GLMModel model = null;
    Frame fr = parse_test_file(parsed, "smalldata/logreg/prostate.csv");
    fr.remove("ID").remove();
    DKV.put(fr._key, fr);
    Key betaConsKey = Key.make("beta_constraints");
    FVecTest.makeByteVec(betaConsKey, "names, beta_given, rho\n AGE, 0.1, 1\n RACE, -0.1, 1 \n DPROS, 10, 1 \n DCAPS, -10, 1 \n PSA, 0, 1\n VOL, 0, 1\nGLEASON, 0, 1\n Intercept, 0, 0 \n");
    Frame betaConstraints = ParseDataset.parse(Key.make("beta_constraints.hex"), betaConsKey);
    try {
        // H2O differs on intercept and race, same residual deviance though
        GLMParameters params = new GLMParameters();
        params._standardize = false;
        params._family = Family.binomial;
        params._beta_constraints = betaConstraints._key;
        params._response_column = "CAPSULE";
        params._ignored_columns = new String[] { "ID" };
        params._train = fr._key;
        params._alpha = new double[] { 0 };
        params._lambda = new double[] { 0 };
        params._obj_reg = 1.0 / 380;
        params._objective_epsilon = 0;
        GLM glm = new GLM(params, modelKey);
        model = glm.trainModel().get();
        double[] beta_1 = model.beta();
        params._solver = Solver.L_BFGS;
        params._max_iterations = 1000;
        glm = new GLM(params, modelKey);
        model = glm.trainModel().get();
        fr.add("CAPSULE", fr.remove("CAPSULE"));
        // now check the ginfo
        DataInfo dinfo = new DataInfo(fr, null, 1, true, TransformType.NONE, DataInfo.TransformType.NONE, true, false, false, false, false, false);
        GLMGradientTask lt = new GLMBinomialGradientTask(null, dinfo, params, 0, beta_1).doAll(dinfo._adaptedFrame);
        double[] grad = lt._gradient;
        for (int i = 0; i < beta_1.length; ++i) assertEquals(0, grad[i] + betaConstraints.vec("rho").at(i) * (beta_1[i] - betaConstraints.vec("beta_given").at(i)), 1e-4);
    } finally {
        betaConstraints.delete();
        fr.delete();
        if (model != null)
            model.delete();
    }
}
Also used : GLMParameters(hex.glm.GLMModel.GLMParameters)

Example 49 with GLMParameters

use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.

the class L_BFGS_Test method logistic.

@Test
public void logistic() {
    Key parsedKey = Key.make("prostate");
    DataInfo dinfo = null;
    try {
        GLMParameters glmp = new GLMParameters(Family.binomial, Family.binomial.defaultLink);
        glmp._alpha = new double[] { 0 };
        glmp._lambda = new double[] { 1e-5 };
        Frame source = parse_test_file(parsedKey, "smalldata/glm_test/prostate_cat_replaced.csv");
        source.add("CAPSULE", source.remove("CAPSULE"));
        source.remove("ID").remove();
        Frame valid = new Frame(source._names.clone(), source.vecs().clone());
        dinfo = new DataInfo(source, valid, 1, false, DataInfo.TransformType.STANDARDIZE, DataInfo.TransformType.NONE, true, false, false, /* weights */
        false, /* offset */
        false, /* fold */
        false);
        DKV.put(dinfo._key, dinfo);
        glmp._obj_reg = 1 / 380.0;
        GLMGradientSolver solver = new GLMGradientSolver(null, glmp, dinfo, 1e-5, null);
        L_BFGS lbfgs = new L_BFGS().setGradEps(1e-8);
        double[] beta = MemoryManager.malloc8d(dinfo.fullN() + 1);
        beta[beta.length - 1] = new GLMWeightsFun(glmp).link(source.vec("CAPSULE").mean());
        L_BFGS.Result r = lbfgs.solve(solver, beta, solver.getGradient(beta), new L_BFGS.ProgressMonitor() {

            int _i = 0;

            public boolean progress(double[] beta, GradientInfo ginfo) {
                System.out.println(++_i + ":" + ginfo._objVal + ", " + ArrayUtils.l2norm2(ginfo._gradient, false));
                return true;
            }
        });
        assertEquals(378.34, 2 * r.ginfo._objVal * source.numRows(), 1e-1);
    } finally {
        if (dinfo != null)
            DKV.remove(dinfo._key);
        Value v = DKV.get(parsedKey);
        if (v != null) {
            v.<Frame>get().delete();
        }
    }
}
Also used : DataInfo(hex.DataInfo) Frame(water.fvec.Frame) GLMGradientSolver(hex.glm.GLM.GLMGradientSolver) GradientInfo(hex.optimization.OptimizationUtils.GradientInfo) GLMWeightsFun(hex.glm.GLMModel.GLMWeightsFun) GLMParameters(hex.glm.GLMModel.GLMParameters) Test(org.junit.Test)

Example 50 with GLMParameters

use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.

the class L_BFGS_Test method testArcene.

// Test LSM on arcene - wide dataset with ~10k columns
// test warm start and max #iteratoions
@Test
public void testArcene() {
    Key parsedKey = Key.make("arcene_parsed");
    DataInfo dinfo = null;
    try {
        Frame source = parse_test_file(parsedKey, "smalldata/glm_test/arcene.csv");
        Frame valid = new Frame(source._names.clone(), source.vecs().clone());
        GLMParameters glmp = new GLMParameters(Family.gaussian);
        glmp._lambda = new double[] { 1e-5 };
        glmp._alpha = new double[] { 0 };
        glmp._obj_reg = 0.01;
        dinfo = new DataInfo(source, valid, 1, false, DataInfo.TransformType.STANDARDIZE, DataInfo.TransformType.NONE, true, false, false, /* weights */
        false, /* offset */
        false, /* fold */
        false);
        DKV.put(dinfo._key, dinfo);
        GradientSolver solver = new GLMGradientSolver(null, glmp, dinfo, 1e-5, null);
        L_BFGS lbfgs = new L_BFGS().setMaxIter(20);
        double[] beta = MemoryManager.malloc8d(dinfo.fullN() + 1);
        beta[beta.length - 1] = new GLMWeightsFun(glmp).link(source.lastVec().mean());
        L_BFGS.Result r1 = lbfgs.solve(solver, beta.clone(), solver.getGradient(beta), new L_BFGS.ProgressMonitor() {

            int _i = 0;

            public boolean progress(double[] beta, GradientInfo ginfo) {
                System.out.println(++_i + ":" + ginfo._objVal);
                return true;
            }
        });
        lbfgs.setMaxIter(50);
        final int iter = r1.iter;
        L_BFGS.Result r2 = lbfgs.solve(solver, r1.coefs, r1.ginfo, new L_BFGS.ProgressMonitor() {

            int _i = 0;

            public boolean progress(double[] beta, GradientInfo ginfo) {
                System.out.println(iter + " + " + ++_i + ":" + ginfo._objVal);
                return true;
            }
        });
        System.out.println();
        lbfgs = new L_BFGS().setMaxIter(100);
        L_BFGS.Result r3 = lbfgs.solve(solver, beta.clone(), solver.getGradient(beta), new L_BFGS.ProgressMonitor() {

            int _i = 0;

            public boolean progress(double[] beta, GradientInfo ginfo) {
                System.out.println(++_i + ":" + ginfo._objVal + ", " + ArrayUtils.l2norm2(ginfo._gradient, false));
                return true;
            }
        });
        assertEquals(r1.iter, 20);
        //      assertEquals (r1.iter + r2.iter,r3.iter); // should be equal? got mismatch by 2
        assertEquals(r2.ginfo._objVal, r3.ginfo._objVal, 1e-8);
        assertEquals(.5 * glmp._lambda[0] * ArrayUtils.l2norm(r3.coefs, true) + r3.ginfo._objVal, 1e-4, 5e-4);
        assertTrue("iter# expected < 100, got " + r3.iter, r3.iter < 100);
    } finally {
        if (dinfo != null)
            DKV.remove(dinfo._key);
        Value v = DKV.get(parsedKey);
        if (v != null) {
            v.<Frame>get().delete();
        }
    }
}
Also used : DataInfo(hex.DataInfo) Frame(water.fvec.Frame) GLMGradientSolver(hex.glm.GLM.GLMGradientSolver) GradientSolver(hex.optimization.OptimizationUtils.GradientSolver) GLMGradientSolver(hex.glm.GLM.GLMGradientSolver) GradientInfo(hex.optimization.OptimizationUtils.GradientInfo) GLMWeightsFun(hex.glm.GLMModel.GLMWeightsFun) GLMParameters(hex.glm.GLMModel.GLMParameters) Test(org.junit.Test)

Aggregations

GLMParameters (hex.glm.GLMModel.GLMParameters)50 Test (org.junit.Test)23 Solver (hex.glm.GLMModel.GLMParameters.Solver)16 ModelMetricsBinomialGLM (hex.ModelMetricsBinomialGLM)13 BufferedString (water.parser.BufferedString)10 ModelMetricsRegressionGLM (hex.ModelMetricsRegressionGLM)8 Frame (water.fvec.Frame)7 H2OModelBuilderIllegalArgumentException (water.exceptions.H2OModelBuilderIllegalArgumentException)6 ModelMetricsMultinomialGLM (hex.ModelMetricsBinomialGLM.ModelMetricsMultinomialGLM)4 GLMWeightsFun (hex.glm.GLMModel.GLMWeightsFun)4 HashMap (java.util.HashMap)4 NFSFileVec (water.fvec.NFSFileVec)3 hex (hex)2 DataInfo (hex.DataInfo)2 GLMGradientSolver (hex.glm.GLM.GLMGradientSolver)2 GradientInfo (hex.optimization.OptimizationUtils.GradientInfo)2 GLMTask (hex.glm.GLMTask)1 GradientSolver (hex.optimization.OptimizationUtils.GradientSolver)1 H2OCountedCompleter (water.H2O.H2OCountedCompleter)1