Search in sources :

Example 41 with GLMParameters

use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.

the class GLMTest method testSynthetic.

@Test
public void testSynthetic() throws Exception {
    GLMModel model = null;
    Frame fr = parse_test_file("smalldata/glm_test/glm_test2.csv");
    Frame score = null;
    try {
        Scope.enter();
        GLMParameters params = new GLMParameters(Family.binomial);
        params._response_column = "response";
        // params._response = fr.find(params._response_column);
        params._ignored_columns = new String[] { "ID" };
        params._train = fr._key;
        params._lambda = new double[] { 0 };
        params._standardize = false;
        params._max_iterations = 20;
        GLM glm = new GLM(params);
        model = glm.trainModel().get();
        double[] beta = model.beta();
        System.out.println("beta = " + Arrays.toString(beta));
        assertEquals(auc(model), 1, 1e-4);
        score = model.score(fr);
        hex.ModelMetricsBinomial mm = hex.ModelMetricsBinomial.getFromDKV(model, fr);
        hex.AUC2 adata = mm._auc;
        assertEquals(auc(model), adata._auc, 1e-2);
    } finally {
        fr.remove();
        if (model != null)
            model.delete();
        if (score != null)
            score.delete();
        Scope.exit();
    }
}
Also used : GLMParameters(hex.glm.GLMModel.GLMParameters) hex(hex)

Example 42 with GLMParameters

use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.

the class GLMTest method testCars.

//------------ TEST on selected files form small data and compare to R results ------------------------------------
/**
   * Simple test for poisson, gamma and gaussian families (no regularization, test both lsm solvers).
   * Basically tries to predict horse power based on other parameters of the cars in the dataset.
   * Compare against the results from standard R glm implementation.
   *
   * @throws ExecutionException
   * @throws InterruptedException
   */
@Test
public void testCars() throws InterruptedException, ExecutionException {
    Scope.enter();
    Key parsed = Key.make("cars_parsed");
    Frame fr = null;
    GLMModel model = null;
    Frame score = null;
    try {
        fr = parse_test_file(parsed, "smalldata/junit/cars.csv");
        GLMParameters params = new GLMParameters(Family.poisson, Family.poisson.defaultLink, new double[] { 0 }, new double[] { 0 }, 0, 0);
        params._response_column = "power (hp)";
        // params._response = fr.find(params._response_column);
        params._ignored_columns = new String[] { "name" };
        params._train = parsed;
        params._lambda = new double[] { 0 };
        params._alpha = new double[] { 0 };
        params._missing_values_handling = MissingValuesHandling.Skip;
        model = new GLM(params).trainModel().get();
        HashMap<String, Double> coefs = model.coefficients();
        String[] cfs1 = new String[] { "Intercept", "economy (mpg)", "cylinders", "displacement (cc)", "weight (lb)", "0-60 mph (s)", "year" };
        double[] vls1 = new double[] { 4.9504805, -0.0095859, -0.0063046, 0.0004392, 0.0001762, -0.0469810, 0.0002891 };
        for (int i = 0; i < cfs1.length; ++i) assertEquals(vls1[i], coefs.get(cfs1[i]), 1e-4);
        // test gamma
        double[] vls2 = new double[] { 8.992e-03, 1.818e-04, -1.125e-04, 1.505e-06, -1.284e-06, 4.510e-04, -7.254e-05 };
        testScoring(model, fr);
        model.delete();
        params = new GLMParameters(Family.gamma, Family.gamma.defaultLink, new double[] { 0 }, new double[] { 0 }, 0, 0);
        params._response_column = "power (hp)";
        // params._response = fr.find(params._response_column);
        params._ignored_columns = new String[] { "name" };
        params._train = parsed;
        params._lambda = new double[] { 0 };
        params._beta_epsilon = 1e-5;
        params._missing_values_handling = MissingValuesHandling.Skip;
        model = new GLM(params).trainModel().get();
        coefs = model.coefficients();
        for (int i = 0; i < cfs1.length; ++i) assertEquals(vls2[i], coefs.get(cfs1[i]), 1e-4);
        testScoring(model, fr);
        model.delete();
        // test gaussian
        double[] vls3 = new double[] { 166.95862, -0.00531, -2.46690, 0.12635, 0.02159, -4.66995, -0.85724 };
        params = new GLMParameters(Family.gaussian);
        params._response_column = "power (hp)";
        // params._response = fr.find(params._response_column);
        params._ignored_columns = new String[] { "name" };
        params._train = parsed;
        params._lambda = new double[] { 0 };
        params._missing_values_handling = MissingValuesHandling.Skip;
        model = new GLM(params).trainModel().get();
        coefs = model.coefficients();
        for (int i = 0; i < cfs1.length; ++i) assertEquals(vls3[i], coefs.get(cfs1[i]), 1e-4);
    // test scoring
    } finally {
        if (fr != null)
            fr.delete();
        if (score != null)
            score.delete();
        if (model != null)
            model.delete();
        Scope.exit();
    }
}
Also used : GLMParameters(hex.glm.GLMModel.GLMParameters) BufferedString(water.parser.BufferedString)

Example 43 with GLMParameters

use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.

the class GLMTest method testCitibikeReproPUBDEV1953.

@Test
public void testCitibikeReproPUBDEV1953() throws Exception {
    GLMModel model = null;
    Frame tfr = parse_test_file("smalldata/glm_test/citibike_small_train.csv");
    Frame vfr = parse_test_file("smalldata/glm_test/citibike_small_test.csv");
    try {
        Scope.enter();
        GLMParameters params = new GLMParameters(Family.poisson);
        params._response_column = "bikes";
        params._train = tfr._key;
        params._valid = vfr._key;
        params._family = Family.poisson;
        GLM glm = new GLM(params);
        model = glm.trainModel().get();
        testScoring(model, vfr);
    } finally {
        tfr.remove();
        vfr.remove();
        if (model != null)
            model.delete();
        Scope.exit();
    }
}
Also used : GLMParameters(hex.glm.GLMModel.GLMParameters)

Example 44 with GLMParameters

use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.

the class GLMTest method test_COD_Airlines_SingleLambda_CovUpdates.

@Test
public void test_COD_Airlines_SingleLambda_CovUpdates() {
    GLMModel model1 = null;
    //  Distance + Origin + Dest + UniqueCarrier
    Frame fr = parse_test_file(Key.make("Airlines"), "smalldata/airlines/AirlinesTrain.csv.zip");
    String[] ignoredCols = new String[] { "IsDepDelayed_REC" };
    try {
        Scope.enter();
        GLMParameters params = new GLMParameters(Family.binomial);
        params._response_column = "IsDepDelayed";
        params._ignored_columns = ignoredCols;
        params._train = fr._key;
        params._valid = fr._key;
        //null; //new double[]{0.02934};//{0.02934494}; // null;
        params._lambda = new double[] { 0.01 };
        params._alpha = new double[] { 1 };
        params._standardize = false;
        params._solver = Solver.COORDINATE_DESCENT;
        params._lambda_search = true;
        GLM glm = new GLM(params);
        model1 = glm.trainModel().get();
        double[] beta = model1.beta();
        double l1pen = ArrayUtils.l1norm(beta, true);
        double l2pen = ArrayUtils.l2norm2(beta, true);
    //      double objective = job.likelihood()/model1._nobs +
    //              params._l2pen[params._l2pen.length-1]*params._alpha[0]*l1pen + params._l2pen[params._l2pen.length-1]*(1-params._alpha[0])*l2pen/2  ;
    //      System.out.println( " objective value " + objective);
    //      assertEquals(0.670921, objective,1e-2);
    } finally {
        fr.delete();
        if (model1 != null)
            model1.delete();
    }
}
Also used : GLMParameters(hex.glm.GLMModel.GLMParameters) BufferedString(water.parser.BufferedString)

Example 45 with GLMParameters

use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.

the class GLMTest method testAirlines.

// test categorical autoexpansions, run on airlines which has several categorical columns,
// once on explicitly expanded data, once on h2o autoexpanded and compare the results
@Test
public void testAirlines() {
    GLMModel model1 = null, model2 = null, model3 = null, model4 = null;
    Frame frMM = parse_test_file(Key.make("AirlinesMM"), "smalldata/airlines/AirlinesTrainMM.csv.zip");
    Frame frG = parse_test_file(Key.make("gram"), "smalldata/airlines/gram_std.csv", true);
    Vec xy = frG.remove("xy");
    frMM.remove("C1").remove();
    Vec v;
    frMM.add("IsDepDelayed", (v = frMM.remove("IsDepDelayed")).makeCopy(null));
    v.remove();
    DKV.put(frMM._key, frMM);
    Frame fr = parse_test_file(Key.make("Airlines"), "smalldata/airlines/AirlinesTrain.csv.zip"), res = null;
    fr.add("IsDepDelayed", (v = fr.remove("IsDepDelayed")).makeCopy(null));
    v.remove();
    DKV.put(fr._key, fr);
    //  Distance + Origin + Dest + UniqueCarrier
    String[] ignoredCols = new String[] { "fYear", "fMonth", "fDayofMonth", "fDayOfWeek", "DepTime", "ArrTime", "IsDepDelayed_REC" };
    try {
        Scope.enter();
        GLMParameters params = new GLMParameters(Family.gaussian);
        params._response_column = "IsDepDelayed";
        params._ignored_columns = ignoredCols;
        params._train = fr._key;
        params._lambda = new double[] { 0 };
        params._alpha = new double[] { 0 };
        params._standardize = false;
        params._use_all_factor_levels = false;
        model1 = new GLM(params).trainModel().get();
        testScoring(model1, fr);
        Frame score1 = model1.score(fr);
        ModelMetricsRegressionGLM mm = (ModelMetricsRegressionGLM) ModelMetrics.getFromDKV(model1, fr);
        Assert.assertEquals(((ModelMetricsRegressionGLM) model1._output._training_metrics)._resDev, mm._resDev, 1e-4);
        Assert.assertEquals(((ModelMetricsRegressionGLM) model1._output._training_metrics)._resDev, mm._MSE * score1.numRows(), 1e-4);
        score1.delete();
        mm.remove();
        res = model1.score(fr);
        // Build a POJO, validate same results
        params._train = frMM._key;
        params._ignored_columns = new String[] { "X" };
        model2 = new GLM(params).trainModel().get();
        HashMap<String, Double> coefs1 = model1.coefficients();
        testScoring(model2, frMM);
        HashMap<String, Double> coefs2 = model2.coefficients();
        boolean failed = false;
        // compare against each other
        for (String s : coefs2.keySet()) {
            String s1 = s;
            if (s.startsWith("Origin"))
                s1 = "Origin." + s.substring(6);
            if (s.startsWith("Dest"))
                s1 = "Dest." + s.substring(4);
            if (s.startsWith("UniqueCarrier"))
                s1 = "UniqueCarrier." + s.substring(13);
            if (Math.abs(coefs1.get(s1) - coefs2.get(s)) > 1e-4) {
                System.out.println("coeff " + s1 + " differs, " + coefs1.get(s1) + " != " + coefs2.get(s));
                failed = true;
            }
        //        assertEquals("coeff " + s1 + " differs, " + coefs1.get(s1) + " != " + coefs2.get(s), coefs1.get(s1), coefs2.get(s), 1e-4);
        }
        assertFalse(failed);
        params._standardize = true;
        params._train = frMM._key;
        params._use_all_factor_levels = true;
        // test the gram
        DataInfo dinfo = new DataInfo(frMM, null, 1, true, DataInfo.TransformType.STANDARDIZE, DataInfo.TransformType.NONE, true, false, false, false, false, false);
        GLMIterationTask glmt = new GLMIterationTask(null, dinfo, new GLMWeightsFun(params), null).doAll(dinfo._adaptedFrame);
        for (int i = 0; i < glmt._xy.length; ++i) {
            for (int j = 0; j <= i; ++j) {
                assertEquals(frG.vec(j).at(i), glmt._gram.get(i, j), 1e-5);
            }
            assertEquals(xy.at(i), glmt._xy[i], 1e-5);
        }
        xy.remove();
        params = (GLMParameters) params.clone();
        params._standardize = false;
        params._family = Family.binomial;
        params._link = Link.logit;
        model3 = new GLM(params).trainModel().get();
        testScoring(model3, frMM);
        params._train = fr._key;
        params._ignored_columns = ignoredCols;
        model4 = new GLM(params).trainModel().get();
        testScoring(model4, fr);
        assertEquals(nullDeviance(model3), nullDeviance(model4), 1e-4);
        assertEquals(residualDeviance(model4), residualDeviance(model3), nullDeviance(model3) * 1e-3);
        assertEquals(nullDeviance(model1), nullDeviance(model2), 1e-4);
        assertEquals(residualDeviance(model1), residualDeviance(model2), 1e-4);
        //      assertEquals(val1._aic, val2._aic,1e-2);
        // compare result against glmnet
        assertEquals(5336.918, residualDeviance(model1), 1);
        assertEquals(6051.613, nullDeviance(model2), 1);
    // lbfgs
    //      params._solver = Solver.L_BFGS;
    //      params._train = fr._key;
    //      params._lambda = new double[]{.3};
    //      model3 = new GLM(params,glmkey("lbfgs_cat")).trainModel().get();
    //      params._train = frMM._key;
    //      mdoel4 = new GLM(params,glmkey("lbfgs_mm")).trainModel().get();
    //      HashMap<String, Double> coefs3 = model3.coefficients();
    //      HashMap<String, Double> coefs4 = model4.coefficients();
    //      // compare against each other
    //      for(String s:coefs4.keySet()) {
    //        String s1 = s;
    //        if(s.startsWith("Origin"))
    //          s1 = "Origin." + s.substring(6);
    //        if(s.startsWith("Dest"))
    //          s1 = "Dest." + s.substring(4);
    //        if(s.startsWith("UniqueCarrier"))
    //          s1 = "UniqueCarrier." + s.substring(13);
    //        assertEquals("coeff " + s1 + " differs, " + coefs3.get(s1) + " != " + coefs4.get(s), coefs3.get(s1), coefs4.get(s),1e-4);
    //      }
    } finally {
        fr.delete();
        frMM.delete();
        frG.delete();
        if (res != null)
            res.delete();
        if (model1 != null)
            model1.delete();
        if (model2 != null)
            model2.delete();
        if (model3 != null)
            model3.delete();
        if (model4 != null)
            model4.delete();
        //      if(score != null)score.delete();
        Scope.exit();
    }
}
Also used : BufferedString(water.parser.BufferedString) GLMWeightsFun(hex.glm.GLMModel.GLMWeightsFun) GLMParameters(hex.glm.GLMModel.GLMParameters)

Aggregations

GLMParameters (hex.glm.GLMModel.GLMParameters)50 Test (org.junit.Test)23 Solver (hex.glm.GLMModel.GLMParameters.Solver)16 ModelMetricsBinomialGLM (hex.ModelMetricsBinomialGLM)13 BufferedString (water.parser.BufferedString)10 ModelMetricsRegressionGLM (hex.ModelMetricsRegressionGLM)8 Frame (water.fvec.Frame)7 H2OModelBuilderIllegalArgumentException (water.exceptions.H2OModelBuilderIllegalArgumentException)6 ModelMetricsMultinomialGLM (hex.ModelMetricsBinomialGLM.ModelMetricsMultinomialGLM)4 GLMWeightsFun (hex.glm.GLMModel.GLMWeightsFun)4 HashMap (java.util.HashMap)4 NFSFileVec (water.fvec.NFSFileVec)3 hex (hex)2 DataInfo (hex.DataInfo)2 GLMGradientSolver (hex.glm.GLM.GLMGradientSolver)2 GradientInfo (hex.optimization.OptimizationUtils.GradientInfo)2 GLMTask (hex.glm.GLMTask)1 GradientSolver (hex.optimization.OptimizationUtils.GradientSolver)1 H2OCountedCompleter (water.H2O.H2OCountedCompleter)1