Search in sources :

Example 1 with ModelMetricsBinomialGLM

use of hex.ModelMetricsBinomialGLM in project h2o-3 by h2oai.

the class GLMBasicTestBinomial method testNoIntercept.

@Test
public void testNoIntercept() {
    GLMModel model = null;
    //    Call:  glm(formula = CAPSULE ~ . - 1 - RACE - DCAPS, family = binomial,
    //      data = train)
    //
    //    Coefficients:
    //    AGE        DPROSa    DPROSb    DPROSc    DPROSd       PSA       VOL   GLEASON
    //    -0.00743  -6.46499  -5.60120  -5.18213  -5.70027   0.02753  -0.01235   0.86122
    //
    //    Degrees of Freedom: 290 Total (i.e. Null);  282 Residual
    //    Null Deviance:	    402
    //    Residual Deviance: 302.9 	AIC: 318.9
    String[] cfs1 = new String[] { "AGE", "DPROS.a", "DPROS.b", "DPROS.c", "DPROS.d", "PSA", "VOL", "GLEASON" };
    double[] vals = new double[] { -0.00743, -6.46499, -5.60120, -5.18213, -5.70027, 0.02753, -0.01235, 0.86122 };
    GLMParameters params = new GLMParameters(Family.binomial);
    params._response_column = "CAPSULE";
    params._ignored_columns = new String[] { "ID", "RACE", "DCAPS" };
    params._train = _prostateTrain._key;
    params._valid = _prostateTest._key;
    params._lambda = new double[] { 0 };
    params._alpha = new double[] { 0 };
    params._standardize = false;
    params._intercept = false;
    params._objective_epsilon = 0;
    params._gradient_epsilon = 1e-6;
    params._missing_values_handling = MissingValuesHandling.Skip;
    // not expected to reach max iterations here
    params._max_iterations = 100;
    for (Solver s : new Solver[] { Solver.AUTO, Solver.IRLSM, Solver.L_BFGS, Solver.COORDINATE_DESCENT }) {
        Frame scoreTrain = null, scoreTest = null;
        try {
            params._solver = s;
            System.out.println("SOLVER = " + s);
            model = new GLM(params).trainModel().get();
            HashMap<String, Double> coefs = model.coefficients();
            System.out.println("coefs = " + coefs.toString());
            System.out.println("metrics = " + model._output._training_metrics);
            boolean CD = (s == Solver.COORDINATE_DESCENT || s == Solver.COORDINATE_DESCENT_NAIVE);
            for (int i = 0; i < cfs1.length; ++i) assertEquals(vals[i], coefs.get(cfs1[i]), CD ? 1e-1 : 1e-4);
            assertEquals(402, GLMTest.nullDeviance(model), 1e-1);
            assertEquals(302.9, GLMTest.residualDeviance(model), 1e-1);
            assertEquals(290, GLMTest.nullDOF(model), 0);
            assertEquals(282, GLMTest.resDOF(model), 0);
            assertEquals(318.9, GLMTest.aic(model), 1e-1);
            System.out.println("VAL METRICS: " + model._output._validation_metrics);
            // compare validation res dev matches R
            // sum(binomial()$dev.resids(y=test$CAPSULE,mu=p,wt=1))
            // [1]80.92923
            assertTrue(80.92923 >= GLMTest.residualDevianceTest(model) - 1e-2);
            //      compare validation null dev against R
            //      sum(binomial()$dev.resids(y=test$CAPSULE,mu=.5,wt=1))
            //      [1] 124.7665
            assertEquals(124.7665, GLMTest.nullDevianceTest(model), 1e-4);
            model.delete();
            // test scoring
            scoreTrain = model.score(_prostateTrain);
            hex.ModelMetricsBinomial mm = hex.ModelMetricsBinomial.getFromDKV(model, _prostateTrain);
            hex.AUC2 adata = mm._auc;
            assertEquals(model._output._training_metrics.auc_obj()._auc, adata._auc, 1e-8);
            assertEquals(model._output._training_metrics._MSE, mm._MSE, 1e-8);
            assertEquals(((ModelMetricsBinomialGLM) model._output._training_metrics)._resDev, ((ModelMetricsBinomialGLM) mm)._resDev, 1e-8);
            scoreTest = model.score(_prostateTest);
            mm = hex.ModelMetricsBinomial.getFromDKV(model, _prostateTest);
            adata = mm._auc;
            assertEquals(model._output._validation_metrics.auc_obj()._auc, adata._auc, 1e-8);
            assertEquals(model._output._validation_metrics._MSE, mm._MSE, 1e-8);
            assertEquals(((ModelMetricsBinomialGLM) model._output._validation_metrics)._resDev, ((ModelMetricsBinomialGLM) mm)._resDev, 1e-8);
        } finally {
            if (model != null)
                model.delete();
            if (scoreTrain != null)
                scoreTrain.delete();
            if (scoreTest != null)
                scoreTest.delete();
        }
    }
}
Also used : Solver(hex.glm.GLMModel.GLMParameters.Solver) ModelMetricsBinomialGLM(hex.ModelMetricsBinomialGLM) GLMParameters(hex.glm.GLMModel.GLMParameters) Test(org.junit.Test)

Example 2 with ModelMetricsBinomialGLM

use of hex.ModelMetricsBinomialGLM in project h2o-3 by h2oai.

the class GLMBasicTestBinomial method testNoInterceptWithOffsetAndWeights.

@Test
public void testNoInterceptWithOffsetAndWeights() {
    Scope.enter();
    GLMModel model = null;
    double[] offset_train = new double[] { -0.39771185, +1.20479170, -0.16374109, -0.97885903, -1.42996530, +0.83474893, +0.83474893, -0.74488827, +0.83474893, +0.86851236, +1.41589611, +1.41589611, -1.42996530, -0.39771185, -2.01111248, -0.39771185, -0.16374109, +0.62364452, -0.39771185, +0.60262749, -0.06143251, -1.42996530, -0.06143251, -0.06143251, +0.14967191, -0.06143251, -0.39771185, +0.14967191, +1.20479170, -0.39771185, -0.16374109, -0.06143251, -0.06143251, -1.42996530, -0.39771185, -0.39771185, -0.64257969, +1.65774729, -0.97885903, -0.39771185, -0.39771185, -0.39771185, -1.42996530, +1.41589611, -0.06143251, -0.06143251, -0.39771185, -0.06143251, -0.06143251, -0.39771185, -0.06143251, +0.14967191, -0.39771185, -1.42996530, -0.39771185, -0.64257969, -0.39771185, -0.06143251, -0.06143251, -0.06143251, -1.42996530, -2.01111248, -0.06143251, -0.39771185, -0.39771185, -1.42996530, -0.39771185, -1.42996530, -0.06143251, +1.41589611, +0.14967191, -1.42996530, -1.42996530, -0.06143251, -1.42996530, -1.42996530, -0.06143251, -1.42996530, -0.06143251, -0.39771185, -0.06143251, -1.42996530, -0.06143251, -0.39771185, -1.42996530, -0.06143251, -0.06143251, -0.06143251, -1.42996530, -0.39771185, -1.42996530, -0.43147527, -0.39771185, -0.39771185, -0.39771185, -1.42996530, -1.42996530, -0.43147527, -0.39771185, -0.39771185, -0.39771185, -0.39771185, -1.42996530, -1.42996530, -1.42996530, -0.39771185, +0.14967191, +1.41589611, -1.42996530, +1.41589611, -1.42996530, +1.41589611, -0.06143251, +0.14967191, -0.39771185, -0.97885903, -1.42996530, -0.39771185, -0.39771185, -0.39771185, -0.39771185, -1.42996530, -0.39771185, -0.97885903, -0.06143251, -0.06143251, +0.86851236, -0.39771185, -0.39771185, -0.06143251, -0.39771185, -0.39771185, -0.06143251, +0.14967191, -1.42996530, -1.42996530, -0.39771185, +1.20479170, -1.42996530, -0.39771185, -0.06143251, -1.42996530, -0.97885903, +0.14967191, +0.14967191, -1.42996530, -1.42996530, -0.39771185, -0.06143251, -0.43147527, -0.06143251, -0.39771185, -1.42996530, -0.06143251, -0.39771185, -0.39771185, -1.42996530, -0.39771185, -0.39771185, -0.06143251, -0.39771185, -0.39771185, +0.14967191, -0.06143251, +1.41589611, -0.06143251, -0.39771185, -0.39771185, -0.06143251, -1.42996530, -0.06143251, -1.42996530, -0.39771185, -0.64257969, -0.06143251, +1.20479170, -0.43147527, -0.97885903, -0.39771185, -0.39771185, -0.39771185, +0.14967191, -2.01111248, -1.42996530, -0.06143251, +0.83474893, -1.42996530, -1.42996530, -2.01111248, -1.42996530, -0.06143251, +0.86851236, +0.05524374, -0.39771185, -0.39771185, -0.39771185, +1.41589611, -1.42996530, -0.39771185, -1.42996530, -0.39771185, -0.39771185, -0.06143251, +0.14967191, -1.42996530, -0.39771185, -1.42996530, -1.42996530, -0.39771185, -0.39771185, -0.06143251, -1.42996530, -0.97885903, -1.42996530, -0.39771185, -0.06143251, -0.39771185, -0.06143251, -1.42996530, -1.42996530, -0.06143251, -1.42996530, -0.39771185, +0.14967191, -0.06143251, -1.42996530, -1.42996530, +0.14967191, -0.39771185, -0.39771185, -1.42996530, -0.06143251, -0.06143251, -1.42996530, -0.06143251, -1.42996530, +0.14967191, +1.20479170, -1.42996530, -0.06143251, -0.39771185, -0.39771185, -0.06143251, +0.14967191, -0.06143251, -1.42996530, -1.42996530, -1.42996530, -0.39771185, -0.39771185, -0.39771185, +0.86851236, -0.06143251, -0.97885903, -0.06143251, -0.64257969, +0.14967191, +0.86851236, -0.39771185, -0.39771185, -0.39771185, -0.64257969, -1.42996530, -0.06143251, -0.39771185, -0.39771185, -1.42996530, -1.42996530, -0.06143251, +0.14967191, -0.06143251, +0.86851236, -0.97885903, -1.42996530, -1.42996530, -1.42996530, -1.42996530, +0.86851236, +0.14967191, -1.42996530, -0.97885903, -1.42996530, -1.42996530, -0.06143251, +0.14967191, -1.42996530, -0.64257969, -2.01111248, -0.97885903, -0.39771185 };
    double[] offset_test = new double[] { +1.65774729, -0.97700971, -0.97700971, -0.97700971, +0.05524374, +0.05524374, +0.05524374, +0.05524374, +0.39152308, +0.39152308, +0.39152308, +0.05524374, +0.05524374, +0.05524374, +0.39152308, -0.97700971, +0.05524374, +1.32146795, +0.39152308, +1.65774729, -0.97700971, +1.65774729, +0.39152308, +0.39152308, +1.65774729, +0.60262749, +0.05524374, +0.05524374, +0.05524374, +0.60262749, +0.05524374, -0.97700971, -0.97885903, +0.05524374, -2.01111248, -0.97700971, +0.05524374, +0.39152308, +0.05524374, +0.60262749, +0.60262749, +0.39152308, +0.60262749, -0.97700971, +0.39152308, +1.65774729, +0.39152308, +0.39152308, +0.05524374, +1.86885170, +0.05524374, -0.97700971, +0.60262749, -0.97700971, +0.60262749, -0.97700971, +0.39152308, -0.97700971, -0.43147527, +1.32146795, +0.05524374, +0.05524374, +0.39152308, +0.39152308, +0.05524374, +0.39152308, -0.97700971, +0.05524374, +0.39152308, +0.05524374, +0.60262749, +1.86885170, +0.05524374, +0.05524374, +1.86885170, +0.60262749, -0.64257969, -0.97700971, +0.60262749, +0.39152308, -0.97700971, -0.97700971, +0.05524374, -0.97700971, -0.97700971, +0.05524374, +0.05524374, +0.60262749, +0.05524374, +0.05524374 };
    // random observation weights, integers in 0 - 9 range
    double[] weights_train = new double[] { 0, 6, 5, 4, 4, 8, 2, 4, 9, 5, 2, 0, 0, 4, 0, 0, 6, 3, 6, 5, 5, 5, 6, 0, 9, 9, 8, 6, 6, 5, 6, 1, 0, 6, 8, 6, 9, 2, 8, 0, 3, 0, 2, 3, 0, 2, 5, 0, 0, 3, 7, 4, 8, 4, 1, 9, 3, 7, 1, 3, 8, 6, 9, 5, 5, 1, 9, 5, 2, 1, 0, 6, 4, 0, 5, 3, 1, 2, 4, 0, 7, 9, 6, 8, 0, 2, 3, 7, 5, 8, 3, 4, 7, 8, 1, 2, 5, 7, 3, 7, 1, 1, 5, 7, 4, 9, 2, 6, 3, 5, 4, 9, 8, 1, 8, 5, 3, 0, 4, 5, 1, 2, 2, 7, 8, 3, 4, 9, 0, 1, 3, 9, 8, 7, 0, 8, 2, 7, 1, 9, 0, 7, 7, 5, 2, 9, 7, 6, 4, 3, 4, 6, 9, 1, 5, 0, 7, 9, 4, 1, 6, 8, 8, 5, 4, 2, 5, 9, 8, 1, 9, 2, 9, 2, 3, 0, 6, 7, 3, 2, 3, 0, 9, 5, 1, 8, 0, 2, 8, 6, 9, 5, 1, 2, 3, 1, 3, 5, 0, 7, 4, 0, 5, 5, 7, 9, 3, 0, 0, 0, 1, 5, 3, 2, 8, 9, 9, 1, 6, 2, 2, 0, 5, 5, 6, 2, 8, 8, 9, 8, 5, 0, 1, 5, 3, 0, 2, 5, 4, 0, 6, 5, 4, 5, 9, 7, 5, 6, 2, 2, 6, 2, 5, 1, 5, 9, 0, 3, 0, 2, 7, 0, 4, 7, 7, 9, 3, 7, 9, 7, 9, 6, 2, 6, 2, 2, 9, 0, 9, 8, 1, 2, 6, 3, 4, 1, 2, 2, 3, 0 };
    Vec offsetVecTrain = _prostateTrain.anyVec().makeZero();
    try (Vec.Writer vw = offsetVecTrain.open()) {
        for (int i = 0; i < offset_train.length; ++i) vw.set(i, offset_train[i]);
    }
    Vec weightsVecTrain = _prostateTrain.anyVec().makeZero();
    try (Vec.Writer vw = weightsVecTrain.open()) {
        for (int i = 0; i < weights_train.length; ++i) vw.set(i, weights_train[i]);
    }
    Vec offsetVecTest = _prostateTest.anyVec().makeZero();
    try (Vec.Writer vw = offsetVecTest.open()) {
        for (int i = 0; i < offset_test.length; ++i) vw.set(i, offset_test[i]);
    }
    Frame fTrain = new Frame(Key.<Frame>make("prostate_with_offset_train"), new String[] { "offset", "weights" }, new Vec[] { offsetVecTrain, weightsVecTrain });
    fTrain.add(_prostateTrain.names(), _prostateTrain.vecs());
    DKV.put(fTrain);
    Frame fTest = new Frame(Key.<Frame>make("prostate_with_offset_test"), new String[] { "offset" }, new Vec[] { offsetVecTest });
    fTest.add(_prostateTest.names(), _prostateTest.vecs());
    DKV.put(fTest);
    //    Call:  glm(formula = CAPSULE ~ . - ID - RACE - DCAPS - DPROS - 1, family = binomial,
    //      data = train, weights = w, offset = offset_train)
    //
    //    Coefficients:
    //    AGE       PSA        VOL        GLEASON
    //   -0.070637  0.034939  -0.006326   0.645700
    //
    //    Degrees of Freedom: 252 Total (i.e. Null);  248 Residual
    //    Null Deviance:	    1494
    //    Residual Deviance: 1235 	AIC: 1243
    String[] cfs1 = new String[] { "Intercept", "AGE", "PSA", "VOL", "GLEASON" };
    double[] vals = new double[] { 0, -0.070637, 0.034939, -0.006326, 0.645700 };
    GLMParameters params = new GLMParameters(Family.binomial);
    params._response_column = "CAPSULE";
    params._ignored_columns = new String[] { "ID", "RACE", "DPROS", "DCAPS" };
    params._train = fTrain._key;
    params._offset_column = "offset";
    params._weights_column = "weights";
    params._lambda = new double[] { 0 };
    params._alpha = new double[] { 0 };
    params._standardize = false;
    params._objective_epsilon = 0;
    params._gradient_epsilon = 1e-6;
    // not expected to reach max iterations here
    params._max_iterations = 100;
    params._intercept = false;
    params._beta_epsilon = 1e-6;
    try {
        for (Solver s : new Solver[] { Solver.IRLSM, Solver.L_BFGS, Solver.COORDINATE_DESCENT }) {
            Frame scoreTrain = null, scoreTest = null;
            try {
                params._solver = s;
                params._valid = fTest._key;
                System.out.println("SOLVER = " + s);
                try {
                    model = new GLM(params, Key.<GLMModel>make("prostate_model")).trainModel().get();
                } catch (Exception iae) {
                    assertTrue(iae.getMessage().contains("Test/Validation dataset is missing weights column"));
                }
                params._valid = null;
                model = new GLM(params, Key.<GLMModel>make("prostate_model")).trainModel().get();
                HashMap<String, Double> coefs = model.coefficients();
                System.out.println("coefs = " + coefs);
                boolean CD = s == Solver.COORDINATE_DESCENT;
                for (int i = 0; i < cfs1.length; ++i) assertEquals(vals[i], coefs.get(cfs1[i]), CD ? 1e-2 : 1e-4);
                assertEquals(1494, GLMTest.nullDeviance(model), 1);
                assertEquals(1235, GLMTest.residualDeviance(model), 1);
                assertEquals(252, GLMTest.nullDOF(model), 0);
                assertEquals(248, GLMTest.resDOF(model), 0);
                assertEquals(1243, GLMTest.aic(model), 1);
                // test scoring
                try {
                    scoreTrain = model.score(_prostateTrain);
                    assertTrue("shoul've thrown IAE", false);
                } catch (IllegalArgumentException iae) {
                    assertTrue(iae.getMessage().contains("Test/Validation dataset is missing"));
                }
                hex.ModelMetricsBinomialGLM mmTrain = (ModelMetricsBinomialGLM) hex.ModelMetricsBinomial.getFromDKV(model, fTrain);
                hex.AUC2 adata = mmTrain._auc;
                assertEquals(model._output._training_metrics.auc_obj()._auc, adata._auc, 1e-8);
                assertEquals(model._output._training_metrics._MSE, mmTrain._MSE, 1e-8);
                assertEquals(((ModelMetricsBinomialGLM) model._output._training_metrics)._resDev, mmTrain._resDev, 1e-8);
                scoreTrain = model.score(fTrain);
                mmTrain = (ModelMetricsBinomialGLM) hex.ModelMetricsBinomial.getFromDKV(model, fTrain);
                adata = mmTrain._auc;
                assertEquals(model._output._training_metrics.auc_obj()._auc, adata._auc, 1e-8);
                assertEquals(model._output._training_metrics._MSE, mmTrain._MSE, 1e-8);
                assertEquals(((ModelMetricsBinomialGLM) model._output._training_metrics)._resDev, mmTrain._resDev, 1e-8);
            //          scoreTest = model.score(fTest);
            //          ModelMetricsBinomialGLM mmTest = (ModelMetricsBinomialGLM)hex.ModelMetricsBinomial.getFromDKV(model, fTest);
            //          adata = mmTest._auc;
            //          assertEquals(model._output._validation_metrics.auc()._auc, adata._auc, 1e-8);
            //          assertEquals(model._output._validation_metrics._MSE, mmTest._MSE, 1e-8);
            //          assertEquals(((ModelMetricsBinomialGLM) model._output._validation_metrics)._resDev, mmTest._resDev, 1e-8);
            //          // test the actual predictions
            //          Vec preds = scoreTest.vec("p1");
            //          for(int i = 0; i < pred_test.length; ++i)
            //            assertEquals(pred_test[i],preds.at(i),1e-6);
            } finally {
                if (model != null)
                    model.delete();
                if (scoreTrain != null)
                    scoreTrain.delete();
                if (scoreTest != null)
                    scoreTest.delete();
            }
        }
    } finally {
        DKV.remove(fTrain._key);
        DKV.remove(fTest._key);
        Scope.exit();
    }
}
Also used : Solver(hex.glm.GLMModel.GLMParameters.Solver) ModelMetricsBinomialGLM(hex.ModelMetricsBinomialGLM) H2OModelBuilderIllegalArgumentException(water.exceptions.H2OModelBuilderIllegalArgumentException) GLMParameters(hex.glm.GLMModel.GLMParameters) ModelMetricsBinomialGLM(hex.ModelMetricsBinomialGLM) ModelMetricsBinomialGLM(hex.ModelMetricsBinomialGLM) H2OModelBuilderIllegalArgumentException(water.exceptions.H2OModelBuilderIllegalArgumentException) Test(org.junit.Test)

Example 3 with ModelMetricsBinomialGLM

use of hex.ModelMetricsBinomialGLM in project h2o-3 by h2oai.

the class GLMBasicTestBinomial method testNoInterceptWithOffset.

@Test
public void testNoInterceptWithOffset() {
    GLMModel model = null;
    double[] offset_train = new double[] { -0.39771185, +1.20479170, -0.16374109, -0.97885903, -1.42996530, +0.83474893, +0.83474893, -0.74488827, +0.83474893, +0.86851236, +1.41589611, +1.41589611, -1.42996530, -0.39771185, -2.01111248, -0.39771185, -0.16374109, +0.62364452, -0.39771185, +0.60262749, -0.06143251, -1.42996530, -0.06143251, -0.06143251, +0.14967191, -0.06143251, -0.39771185, +0.14967191, +1.20479170, -0.39771185, -0.16374109, -0.06143251, -0.06143251, -1.42996530, -0.39771185, -0.39771185, -0.64257969, +1.65774729, -0.97885903, -0.39771185, -0.39771185, -0.39771185, -1.42996530, +1.41589611, -0.06143251, -0.06143251, -0.39771185, -0.06143251, -0.06143251, -0.39771185, -0.06143251, +0.14967191, -0.39771185, -1.42996530, -0.39771185, -0.64257969, -0.39771185, -0.06143251, -0.06143251, -0.06143251, -1.42996530, -2.01111248, -0.06143251, -0.39771185, -0.39771185, -1.42996530, -0.39771185, -1.42996530, -0.06143251, +1.41589611, +0.14967191, -1.42996530, -1.42996530, -0.06143251, -1.42996530, -1.42996530, -0.06143251, -1.42996530, -0.06143251, -0.39771185, -0.06143251, -1.42996530, -0.06143251, -0.39771185, -1.42996530, -0.06143251, -0.06143251, -0.06143251, -1.42996530, -0.39771185, -1.42996530, -0.43147527, -0.39771185, -0.39771185, -0.39771185, -1.42996530, -1.42996530, -0.43147527, -0.39771185, -0.39771185, -0.39771185, -0.39771185, -1.42996530, -1.42996530, -1.42996530, -0.39771185, +0.14967191, +1.41589611, -1.42996530, +1.41589611, -1.42996530, +1.41589611, -0.06143251, +0.14967191, -0.39771185, -0.97885903, -1.42996530, -0.39771185, -0.39771185, -0.39771185, -0.39771185, -1.42996530, -0.39771185, -0.97885903, -0.06143251, -0.06143251, +0.86851236, -0.39771185, -0.39771185, -0.06143251, -0.39771185, -0.39771185, -0.06143251, +0.14967191, -1.42996530, -1.42996530, -0.39771185, +1.20479170, -1.42996530, -0.39771185, -0.06143251, -1.42996530, -0.97885903, +0.14967191, +0.14967191, -1.42996530, -1.42996530, -0.39771185, -0.06143251, -0.43147527, -0.06143251, -0.39771185, -1.42996530, -0.06143251, -0.39771185, -0.39771185, -1.42996530, -0.39771185, -0.39771185, -0.06143251, -0.39771185, -0.39771185, +0.14967191, -0.06143251, +1.41589611, -0.06143251, -0.39771185, -0.39771185, -0.06143251, -1.42996530, -0.06143251, -1.42996530, -0.39771185, -0.64257969, -0.06143251, +1.20479170, -0.43147527, -0.97885903, -0.39771185, -0.39771185, -0.39771185, +0.14967191, -2.01111248, -1.42996530, -0.06143251, +0.83474893, -1.42996530, -1.42996530, -2.01111248, -1.42996530, -0.06143251, +0.86851236, +0.05524374, -0.39771185, -0.39771185, -0.39771185, +1.41589611, -1.42996530, -0.39771185, -1.42996530, -0.39771185, -0.39771185, -0.06143251, +0.14967191, -1.42996530, -0.39771185, -1.42996530, -1.42996530, -0.39771185, -0.39771185, -0.06143251, -1.42996530, -0.97885903, -1.42996530, -0.39771185, -0.06143251, -0.39771185, -0.06143251, -1.42996530, -1.42996530, -0.06143251, -1.42996530, -0.39771185, +0.14967191, -0.06143251, -1.42996530, -1.42996530, +0.14967191, -0.39771185, -0.39771185, -1.42996530, -0.06143251, -0.06143251, -1.42996530, -0.06143251, -1.42996530, +0.14967191, +1.20479170, -1.42996530, -0.06143251, -0.39771185, -0.39771185, -0.06143251, +0.14967191, -0.06143251, -1.42996530, -1.42996530, -1.42996530, -0.39771185, -0.39771185, -0.39771185, +0.86851236, -0.06143251, -0.97885903, -0.06143251, -0.64257969, +0.14967191, +0.86851236, -0.39771185, -0.39771185, -0.39771185, -0.64257969, -1.42996530, -0.06143251, -0.39771185, -0.39771185, -1.42996530, -1.42996530, -0.06143251, +0.14967191, -0.06143251, +0.86851236, -0.97885903, -1.42996530, -1.42996530, -1.42996530, -1.42996530, +0.86851236, +0.14967191, -1.42996530, -0.97885903, -1.42996530, -1.42996530, -0.06143251, +0.14967191, -1.42996530, -0.64257969, -2.01111248, -0.97885903, -0.39771185 };
    double[] offset_test = new double[] { +1.65774729, -0.97700971, -0.97700971, -0.97700971, +0.05524374, +0.05524374, +0.05524374, +0.05524374, +0.39152308, +0.39152308, +0.39152308, +0.05524374, +0.05524374, +0.05524374, +0.39152308, -0.97700971, +0.05524374, +1.32146795, +0.39152308, +1.65774729, -0.97700971, +1.65774729, +0.39152308, +0.39152308, +1.65774729, +0.60262749, +0.05524374, +0.05524374, +0.05524374, +0.60262749, +0.05524374, -0.97700971, -0.97885903, +0.05524374, -2.01111248, -0.97700971, +0.05524374, +0.39152308, +0.05524374, +0.60262749, +0.60262749, +0.39152308, +0.60262749, -0.97700971, +0.39152308, +1.65774729, +0.39152308, +0.39152308, +0.05524374, +1.86885170, +0.05524374, -0.97700971, +0.60262749, -0.97700971, +0.60262749, -0.97700971, +0.39152308, -0.97700971, -0.43147527, +1.32146795, +0.05524374, +0.05524374, +0.39152308, +0.39152308, +0.05524374, +0.39152308, -0.97700971, +0.05524374, +0.39152308, +0.05524374, +0.60262749, +1.86885170, +0.05524374, +0.05524374, +1.86885170, +0.60262749, -0.64257969, -0.97700971, +0.60262749, +0.39152308, -0.97700971, -0.97700971, +0.05524374, -0.97700971, -0.97700971, +0.05524374, +0.05524374, +0.60262749, +0.05524374, +0.05524374 };
    double[] pred_test = new double[] { +0.88475366, +0.23100271, +0.40966315, +0.08957188, +0.47333302, +0.44622513, +0.56450046, +0.74271010, +0.45129280, +0.72359111, +0.67918401, +0.19882802, +0.42330391, +0.62734862, +0.38055506, +0.47286476, +0.40180469, +0.97907526, +0.61428344, +0.97109299, +0.30489181, +0.81303545, +0.36130639, +0.65434899, +0.98863675, +0.58301866, +0.37950467, +0.53679205, +0.30636941, +0.70320372, +0.45303278, +0.35011042, +0.78165074, +0.44915160, +0.09008065, +0.16789833, +0.45748862, +0.59328118, +0.75002334, +0.35170410, +0.57550279, +0.42038237, +0.76349569, +0.28883753, +0.84824847, +0.72396381, +0.56782477, +0.54078190, +0.51169047, +0.80828547, +0.52001699, +0.26202346, +0.81014557, +0.29986016, +0.62011569, +0.33034872, +0.62284802, +0.28303618, +0.38470707, +0.96444405, +0.36155179, +0.46368503, +0.65192144, +0.43597041, +0.30906461, +0.69259415, +0.21819579, +0.49998652, +0.57162728, +0.44255738, +0.80820564, +0.90616782, +0.49377901, +0.34235025, +0.99621673, +0.65768252, +0.43909050, +0.23205826, +0.71124897, +0.42908417, +0.47880901, +0.29185818, +0.42648317, +0.01247279, +0.18372518, +0.27281535, +0.63807876, +0.44563524, +0.32821696, +0.43636099 };
    Vec offsetVecTrain = _prostateTrain.anyVec().makeZero();
    try (Vec.Writer vw = offsetVecTrain.open()) {
        for (int i = 0; i < offset_train.length; ++i) vw.set(i, offset_train[i]);
    }
    Vec offsetVecTest = _prostateTest.anyVec().makeZero();
    try (Vec.Writer vw = offsetVecTest.open()) {
        for (int i = 0; i < offset_test.length; ++i) vw.set(i, offset_test[i]);
    }
    Key fKeyTrain = Key.make("prostate_with_offset_train");
    Key fKeyTest = Key.make("prostate_with_offset_test");
    Frame fTrain = new Frame(fKeyTrain, new String[] { "offset" }, new Vec[] { offsetVecTrain });
    fTrain.add(_prostateTrain.names(), _prostateTrain.vecs());
    DKV.put(fKeyTrain, fTrain);
    Frame fTest = new Frame(fKeyTest, new String[] { "offset" }, new Vec[] { offsetVecTest });
    fTest.add(_prostateTest.names(), _prostateTest.vecs());
    DKV.put(fKeyTest, fTest);
    //    Call:  glm(formula = CAPSULE ~ . - ID - RACE - DCAPS - DPROS - 1, family = binomial,
    //      data = train, offset = offset_train)
    //
    //    Coefficients:
    //     AGE        PSA        VOL        GLEASON
    //    -0.054102   0.027517  -0.008937   0.516363
    //
    //    Degrees of Freedom: 290 Total (i.e. Null);  286 Residual
    //    Null Deviance:	    355.7
    //    Residual Deviance: 313 	AIC: 321
    String[] cfs1 = new String[] { "Intercept", "AGE", "PSA", "VOL", "GLEASON" };
    double[] vals = new double[] { 0, -0.054102, 0.027517, -0.008937, 0.516363 };
    GLMParameters params = new GLMParameters(Family.binomial);
    params._response_column = "CAPSULE";
    params._ignored_columns = new String[] { "ID", "RACE", "DPROS", "DCAPS" };
    params._train = fKeyTrain;
    params._valid = fKeyTest;
    params._offset_column = "offset";
    params._lambda = new double[] { 0 };
    params._alpha = new double[] { 0 };
    params._standardize = false;
    params._objective_epsilon = 0;
    params._gradient_epsilon = 1e-6;
    // not expected to reach max iterations here
    params._max_iterations = 100;
    params._intercept = false;
    params._beta_epsilon = 1e-6;
    params._missing_values_handling = MissingValuesHandling.Skip;
    try {
        for (Solver s : new Solver[] { Solver.AUTO, Solver.IRLSM, Solver.L_BFGS, Solver.COORDINATE_DESCENT }) {
            Frame scoreTrain = null, scoreTest = null;
            try {
                params._solver = s;
                System.out.println("SOLVER = " + s);
                model = new GLM(params).trainModel().get();
                HashMap<String, Double> coefs = model.coefficients();
                System.out.println("coefs = " + coefs);
                boolean CD = s == Solver.COORDINATE_DESCENT;
                for (int i = 0; i < cfs1.length; ++i) assertEquals(vals[i], coefs.get(cfs1[i]), CD ? 1e-2 : 1e-4);
                assertEquals(355.7, GLMTest.nullDeviance(model), 1e-1);
                assertEquals(313.0, GLMTest.residualDeviance(model), 1e-1);
                assertEquals(290, GLMTest.nullDOF(model), 0);
                assertEquals(286, GLMTest.resDOF(model), 0);
                assertEquals(321, GLMTest.aic(model), 1e-1);
                assertEquals(88.72363, GLMTest.residualDevianceTest(model), CD ? 1e-2 : 1e-4);
                // test scoring
                try {
                    scoreTrain = model.score(_prostateTrain);
                    assertTrue("shoul've thrown IAE", false);
                } catch (IllegalArgumentException iae) {
                    assertTrue(iae.getMessage().contains("Test/Validation dataset is missing offset column"));
                }
                hex.ModelMetricsBinomialGLM mmTrain = (ModelMetricsBinomialGLM) hex.ModelMetricsBinomial.getFromDKV(model, fTrain);
                hex.AUC2 adata = mmTrain._auc;
                assertEquals(model._output._training_metrics.auc_obj()._auc, adata._auc, 1e-8);
                assertEquals(model._output._training_metrics._MSE, mmTrain._MSE, 1e-8);
                assertEquals(((ModelMetricsBinomialGLM) model._output._training_metrics)._resDev, mmTrain._resDev, 1e-8);
                scoreTrain = model.score(fTrain);
                mmTrain = (ModelMetricsBinomialGLM) hex.ModelMetricsBinomial.getFromDKV(model, fTrain);
                adata = mmTrain._auc;
                assertEquals(model._output._training_metrics.auc_obj()._auc, adata._auc, 1e-8);
                assertEquals(model._output._training_metrics._MSE, mmTrain._MSE, 1e-8);
                assertEquals(((ModelMetricsBinomialGLM) model._output._training_metrics)._resDev, mmTrain._resDev, 1e-8);
                scoreTest = model.score(fTest);
                ModelMetricsBinomialGLM mmTest = (ModelMetricsBinomialGLM) hex.ModelMetricsBinomial.getFromDKV(model, fTest);
                adata = mmTest._auc;
                assertEquals(model._output._validation_metrics.auc_obj()._auc, adata._auc, 1e-8);
                assertEquals(model._output._validation_metrics._MSE, mmTest._MSE, 1e-8);
                assertEquals(((ModelMetricsBinomialGLM) model._output._validation_metrics)._resDev, mmTest._resDev, 1e-8);
                GLMTest.testScoring(model, fTest);
                // test the actual predictions
                Vec.Reader preds = scoreTest.vec("p1").new Reader();
                for (int i = 0; i < pred_test.length; ++i) // s == Solver.COORDINATE_DESCENT_NAIVE
                assertEquals(pred_test[i], preds.at(i), CD ? 1e-3 : 1e-6);
            } finally {
                if (model != null)
                    model.delete();
                if (scoreTrain != null)
                    scoreTrain.delete();
                if (scoreTest != null)
                    scoreTest.delete();
            }
        }
    } finally {
        if (fTrain != null) {
            fTrain.remove("offset").remove();
            DKV.remove(fTrain._key);
        }
        if (fTest != null) {
            fTest.remove("offset").remove();
            DKV.remove(fTest._key);
        }
    }
}
Also used : Solver(hex.glm.GLMModel.GLMParameters.Solver) ModelMetricsBinomialGLM(hex.ModelMetricsBinomialGLM) GLMParameters(hex.glm.GLMModel.GLMParameters) ModelMetricsBinomialGLM(hex.ModelMetricsBinomialGLM) ModelMetricsBinomialGLM(hex.ModelMetricsBinomialGLM) H2OModelBuilderIllegalArgumentException(water.exceptions.H2OModelBuilderIllegalArgumentException) Test(org.junit.Test)

Example 4 with ModelMetricsBinomialGLM

use of hex.ModelMetricsBinomialGLM in project h2o-3 by h2oai.

the class GLMBasicTestBinomial method testNonNegative.

@Test
public void testNonNegative() {
    GLMModel model = null;
    //   glmnet result
    //    (Intercept)         AGE      RACER1      RACER2      RACER3      DPROSb
    //    -7.85142421  0.00000000  0.76094020  0.87641840  0.00000000  0.93030614
    //    DPROSc      DPROSd      DCAPSb         PSA         VOL     GLEASON
    //    1.31814009  0.82918839  0.63285077  0.02949062  0.00000000  0.83011321
    String[] cfs1 = new String[] { "Intercept", "AGE", "DPROS.b", "DPROS.c", "DPROS.d", "DCAPS.b", "PSA", "VOL", "GLEASON" };
    double[] vals = new double[] { -7.85142421, 0.0, 0.93030614, 1.31814009, 0.82918839, 0.63285077, 0.02949062, 0.0, 0.83011321 };
    GLMParameters params = new GLMParameters(Family.binomial);
    params._response_column = "CAPSULE";
    params._ignored_columns = new String[] { "ID" };
    params._train = _prostateTrain._key;
    params._lambda = new double[] { 0 };
    params._alpha = new double[] { 0 };
    params._standardize = false;
    params._non_negative = true;
    params._intercept = true;
    params._objective_epsilon = 1e-10;
    params._gradient_epsilon = 1e-6;
    // not expected to reach max iterations here
    params._max_iterations = 10000;
    for (Solver s : new Solver[] { Solver.IRLSM, Solver.L_BFGS, Solver.COORDINATE_DESCENT }) {
        Frame scoreTrain = null, scoreTest = null;
        try {
            params._solver = s;
            System.out.println("SOLVER = " + s);
            model = new GLM(params).trainModel().get();
            HashMap<String, Double> coefs = model.coefficients();
            System.out.println("coefs = " + coefs.toString());
            System.out.println("metrics = " + model._output._training_metrics);
            //        for (int i = 0; i < cfs1.length; ++i)
            //          assertEquals(vals[i], coefs.get(cfs1[i]), Math.abs(5e-1 * vals[i]));
            assertEquals(390.3468, GLMTest.nullDeviance(model), 1e-4);
            assertEquals(300.7231, GLMTest.residualDeviance(model), 3);
            System.out.println("VAL METRICS: " + model._output._validation_metrics);
            model.delete();
            // test scoring
            scoreTrain = model.score(_prostateTrain);
            hex.ModelMetricsBinomial mm = hex.ModelMetricsBinomial.getFromDKV(model, _prostateTrain);
            hex.AUC2 adata = mm._auc;
            assertEquals(model._output._training_metrics.auc_obj()._auc, adata._auc, 1e-8);
            assertEquals(model._output._training_metrics._MSE, mm._MSE, 1e-8);
            assertEquals(((ModelMetricsBinomialGLM) model._output._training_metrics)._resDev, ((ModelMetricsBinomialGLM) mm)._resDev, 1e-8);
        } finally {
            if (model != null)
                model.delete();
            if (scoreTrain != null)
                scoreTrain.delete();
            if (scoreTest != null)
                scoreTest.delete();
        }
    }
}
Also used : Solver(hex.glm.GLMModel.GLMParameters.Solver) ModelMetricsBinomialGLM(hex.ModelMetricsBinomialGLM) GLMParameters(hex.glm.GLMModel.GLMParameters) Test(org.junit.Test)

Example 5 with ModelMetricsBinomialGLM

use of hex.ModelMetricsBinomialGLM in project h2o-3 by h2oai.

the class GLMBasicTestBinomial method testNonNegativeNoIntercept.

@Test
public void testNonNegativeNoIntercept() {
    Scope.enter();
    GLMModel model = null;
    //   glmnet result
    //    (Intercept)         AGE      RACER1      RACER2      RACER3      DPROSb
    //    0.000000000 0.000000000 0.240953925 0.000000000 0.000000000 0.000000000
    //    DPROSc      DPROSd      DCAPSb         PSA         VOL     GLEASON
    //    0.000000000 0.000000000 0.680406869 0.007137494 0.000000000 0.000000000
    String[] cfs1 = new String[] { "Intercept", "AGE", "DPROS.b", "DPROS.c", "DPROS.d", "DCAPS.b", "PSA", "VOL", "GLEASON", "RACE.R1" };
    double[] vals = new double[] { 0.0, 0.0, 0.0, 0, 0.0, 0.680406869, 0.007137494, 0.0, 0.0, 0.240953925 };
    GLMParameters params = new GLMParameters(Family.binomial);
    params._response_column = "CAPSULE";
    params._ignored_columns = new String[] { "ID" };
    params._train = _prostateTrain._key;
    params._lambda = new double[] { 0 };
    params._alpha = new double[] { 0 };
    params._standardize = false;
    params._non_negative = true;
    params._intercept = false;
    params._objective_epsilon = 1e-6;
    params._gradient_epsilon = 1e-5;
    // not expected to reach max iterations here
    params._max_iterations = 150;
    for (Solver s : new Solver[] { Solver.AUTO, Solver.IRLSM, Solver.L_BFGS, Solver.COORDINATE_DESCENT }) {
        Frame scoreTrain = null, scoreTest = null;
        try {
            params._solver = s;
            params._max_iterations = 500;
            System.out.println("SOLVER = " + s);
            model = new GLM(params).trainModel().get();
            HashMap<String, Double> coefs = model.coefficients();
            System.out.println("coefs = " + coefs.toString());
            System.out.println("metrics = " + model._output._training_metrics);
            double relTol = s == Solver.IRLSM ? 1e-1 : 1;
            for (int i = 0; i < cfs1.length; ++i) assertEquals(vals[i], coefs.get(cfs1[i]), relTol * (vals[i] + 1e-1));
            assertEquals(402.0254, GLMTest.nullDeviance(model), 1e-1);
            assertEquals(394.3998, GLMTest.residualDeviance(model), s == Solver.L_BFGS ? 50 : 1);
            System.out.println("VAL METRICS: " + model._output._validation_metrics);
            model.delete();
            // test scoring
            scoreTrain = model.score(_prostateTrain);
            hex.ModelMetricsBinomial mm = hex.ModelMetricsBinomial.getFromDKV(model, _prostateTrain);
            hex.AUC2 adata = mm._auc;
            assertEquals(model._output._training_metrics.auc_obj()._auc, adata._auc, 1e-8);
            assertEquals(model._output._training_metrics._MSE, mm._MSE, 1e-8);
            assertEquals(((ModelMetricsBinomialGLM) model._output._training_metrics)._resDev, ((ModelMetricsBinomialGLM) mm)._resDev, 1e-8);
        } finally {
            if (model != null)
                model.delete();
            if (scoreTrain != null)
                scoreTrain.delete();
            if (scoreTest != null)
                scoreTest.delete();
        }
    }
    Scope.exit();
}
Also used : Solver(hex.glm.GLMModel.GLMParameters.Solver) ModelMetricsBinomialGLM(hex.ModelMetricsBinomialGLM) GLMParameters(hex.glm.GLMModel.GLMParameters) Test(org.junit.Test)

Aggregations

ModelMetricsBinomialGLM (hex.ModelMetricsBinomialGLM)7 GLMParameters (hex.glm.GLMModel.GLMParameters)7 Solver (hex.glm.GLMModel.GLMParameters.Solver)7 Test (org.junit.Test)7 H2OModelBuilderIllegalArgumentException (water.exceptions.H2OModelBuilderIllegalArgumentException)3