Search in sources :

Example 11 with GLMParameters

use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.

the class GLMTest method testGradientTask.

// Make sure all three implementations of ginfo computation in GLM get the same results
@Test
public void testGradientTask() {
    Key parsed = Key.make("cars_parsed");
    Frame fr = null;
    DataInfo dinfo = null;
    try {
        fr = parse_test_file(parsed, "smalldata/junit/mixcat_train.csv");
        GLMParameters params = new GLMParameters(Family.binomial, Family.binomial.defaultLink, new double[] { 0 }, new double[] { 0 }, 0, 0);
        // params._response = fr.find(params._response_column);
        params._train = parsed;
        params._lambda = new double[] { 0 };
        params._use_all_factor_levels = true;
        fr.add("Useless", fr.remove("Useless"));
        dinfo = new DataInfo(fr, null, 1, params._use_all_factor_levels || params._lambda_search, params._standardize ? DataInfo.TransformType.STANDARDIZE : DataInfo.TransformType.NONE, DataInfo.TransformType.NONE, true, false, false, false, false, false);
        DKV.put(dinfo._key, dinfo);
        double[] beta = MemoryManager.malloc8d(dinfo.fullN() + 1);
        Random rnd = new Random(987654321);
        for (int i = 0; i < beta.length; ++i) beta[i] = 1 - 2 * rnd.nextDouble();
        GLMGradientTask grtSpc = new GLMBinomialGradientTask(null, dinfo, params, params._lambda[0], beta).doAll(dinfo._adaptedFrame);
        GLMGradientTask grtGen = new GLMGenericGradientTask(null, dinfo, params, params._lambda[0], beta).doAll(dinfo._adaptedFrame);
        for (int i = 0; i < beta.length; ++i) assertEquals("gradients differ", grtSpc._gradient[i], grtGen._gradient[i], 1e-4);
        params = new GLMParameters(Family.gaussian, Family.gaussian.defaultLink, new double[] { 0 }, new double[] { 0 }, 0, 0);
        params._use_all_factor_levels = false;
        dinfo.remove();
        dinfo = new DataInfo(fr, null, 1, params._use_all_factor_levels || params._lambda_search, params._standardize ? DataInfo.TransformType.STANDARDIZE : DataInfo.TransformType.NONE, DataInfo.TransformType.NONE, true, false, false, false, false, false);
        DKV.put(dinfo._key, dinfo);
        beta = MemoryManager.malloc8d(dinfo.fullN() + 1);
        rnd = new Random(1987654321);
        for (int i = 0; i < beta.length; ++i) beta[i] = 1 - 2 * rnd.nextDouble();
        grtSpc = new GLMGaussianGradientTask(null, dinfo, params, params._lambda[0], beta).doAll(dinfo._adaptedFrame);
        grtGen = new GLMGenericGradientTask(null, dinfo, params, params._lambda[0], beta).doAll(dinfo._adaptedFrame);
        for (int i = 0; i < beta.length; ++i) assertEquals("gradients differ: " + Arrays.toString(grtSpc._gradient) + " != " + Arrays.toString(grtGen._gradient), grtSpc._gradient[i], grtGen._gradient[i], 1e-4);
        dinfo.remove();
    } finally {
        if (fr != null)
            fr.delete();
        if (dinfo != null)
            dinfo.remove();
    }
}
Also used : GLMParameters(hex.glm.GLMModel.GLMParameters)

Example 12 with GLMParameters

use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.

the class GLMTest method testGaussianRegression.

//------------------- simple tests on synthetic data------------------------------------
@Test
public void testGaussianRegression() throws InterruptedException, ExecutionException {
    Key raw = Key.make("gaussian_test_data_raw");
    Key parsed = Key.make("gaussian_test_data_parsed");
    GLMModel model = null;
    Frame fr = null, res = null;
    try {
        // make data so that the expected coefficients is icept = col[0] = 1.0
        FVecTest.makeByteVec(raw, "x,y\n0,0\n1,0.1\n2,0.2\n3,0.3\n4,0.4\n5,0.5\n6,0.6\n7,0.7\n8,0.8\n9,0.9");
        fr = ParseDataset.parse(parsed, raw);
        GLMParameters params = new GLMParameters(Family.gaussian);
        params._train = fr._key;
        // params._response = 1;
        params._response_column = fr._names[1];
        params._lambda = new double[] { 0 };
        //      params._standardize= false;
        model = new GLM(params).trainModel().get();
        HashMap<String, Double> coefs = model.coefficients();
        assertEquals(0.0, coefs.get("Intercept"), 1e-4);
        assertEquals(0.1, coefs.get("x"), 1e-4);
        testScoring(model, fr);
    } finally {
        if (fr != null)
            fr.remove();
        if (res != null)
            res.remove();
        if (model != null)
            model.remove();
    }
}
Also used : GLMParameters(hex.glm.GLMModel.GLMParameters) BufferedString(water.parser.BufferedString)

Example 13 with GLMParameters

use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.

the class GLMTest method testSparseGramComputation.

//  // test categorical autoexpansions, run on airlines which has several categorical columns,
//  // once on explicitly expanded data, once on h2o autoexpanded and compare the results
//  @Test public void testSparseCategoricals() {
//    GLMModel model1 = null, model2 = null, model3 = null, model4 = null;
//
//    Frame frMM = parse_test_file("smalldata/glm_tets/train-2.csv");
//
////    Vec xy = frG.remove("xy");
//    frMM.remove("").remove();
//    frMM.add("IsDepDelayed", frMM.remove("IsDepDelayed"));
//    DKV.put(frMM._key,frMM);
//    Frame fr = parse_test_file("smalldata/airlines/AirlinesTrain.csv.zip"), res = null;
//    //  Distance + Origin + Dest + UniqueCarrier
//    String [] ignoredCols = new String[]{"fYear", "fMonth", "fDayofMonth", "fDayOfWeek", "DepTime","ArrTime","IsDepDelayed_REC"};
//    try{
//      Scope.enter();
//      GLMParameters params = new GLMParameters(Family.gaussian);
//      params._response_column = "IsDepDelayed";
//      params._ignored_columns = ignoredCols;
//      params._train = fr._key;
//      params._l2pen = new double[]{1e-5};
//      params._standardize = false;
//      model1 = new GLM(params,glmkey("airlines_cat_nostd")).trainModel().get();
//      Frame score1 = model1.score(fr);
//      ModelMetricsRegressionGLM mm = (ModelMetricsRegressionGLM) ModelMetrics.getFromDKV(model1, fr);
//      Assert.assertEquals(model1.validation().residual_deviance, mm._resDev, 1e-4);
//      System.out.println("NDOF = " + model1.validation().nullDOF() + ", numRows = " + score1.numRows());
//      Assert.assertEquals(model1.validation().residual_deviance, mm._MSE * score1.numRows(), 1e-4);
//      mm.remove();
//      res = model1.score(fr);
//      // Build a POJO, validate same results
//      Assert.assertTrue(model1.testJavaScoring(fr, res, 1e-15));
//
//      params._train = frMM._key;
//      params._ignored_columns = new String[]{"X"};
//      model2 = new GLM(params,glmkey("airlines_mm")).trainModel().get();
//      params._standardize = true;
//      params._train = frMM._key;
//      params._use_all_factor_levels = true;
//      // test the gram
//      DataInfo dinfo = new DataInfo(Key.make(),frMM, null, 1, true, DataInfo.TransformType.STANDARDIZE, DataInfo.TransformType.NONE, true);
//      GLMIterationTask glmt = new GLMIterationTask(null,dinfo,1e-5,params,false,null,0,null, null).doAll(dinfo._adaptedFrame);
//      for(int i = 0; i < glmt._xy.length; ++i) {
//        for(int j = 0; j <= i; ++j ) {
//          assertEquals(frG.vec(j).at(i), glmt._gram.get(i, j), 1e-5);
//        }
//        assertEquals(xy.at(i), glmt._xy[i], 1e-5);
//      }
//      frG.delete();
//      xy.remove();
//      params._standardize = true;
//      params._family = Family.binomial;
//      params._link = Link.logit;
//      model3 = new GLM(params,glmkey("airlines_mm")).trainModel().get();
//      params._train = fr._key;
//      params._ignored_columns = ignoredCols;
//      model4 = new GLM(params,glmkey("airlines_mm")).trainModel().get();
//      assertEquals(model3.validation().null_deviance,model4.validation().nullDeviance(),1e-4);
//      assertEquals(model4.validation().residual_deviance, model3.validation().residualDeviance(), model3.validation().null_deviance * 1e-3);
//      HashMap<String, Double> coefs1 = model1.coefficients();
//      HashMap<String, Double> coefs2 = model2.coefficients();
//      GLMValidation val1 = model1.validation();
//      GLMValidation val2 = model2.validation();
//      // compare against each other
//      for(String s:coefs2.keySet()) {
//        String s1 = s;
//        if(s.startsWith("Origin"))
//          s1 = "Origin." + s.substring(6);
//        if(s.startsWith("Dest"))
//          s1 = "Dest." + s.substring(4);
//        if(s.startsWith("UniqueCarrier"))
//          s1 = "UniqueCarrier." + s.substring(13);
//        assertEquals("coeff " + s1 + " differs, " + coefs1.get(s1) + " != " + coefs2.get(s), coefs1.get(s1), coefs2.get(s),1e-4);
//        DKV.put(frMM._key,frMM); // update the frame in the KV after removing the vec!
//      }
//      assertEquals(val1.nullDeviance(), val2.nullDeviance(),1e-4);
//      assertEquals(val1.residualDeviance(), val2.residualDeviance(),1e-4);
//      assertEquals(val1._aic, val2._aic,1e-2);
//      // compare result against glmnet
//      assertEquals(5336.918,val1.residualDeviance(),1);
//      assertEquals(6051.613,val1.nullDeviance(),1);
//
//
//      // lbfgs
////      params._solver = Solver.L_BFGS;
////      params._train = fr._key;
////      params._lambda = new double[]{.3};
////      model3 = new GLM(params,glmkey("lbfgs_cat")).trainModel().get();
////      params._train = frMM._key;
////      model4 = new GLM(params,glmkey("lbfgs_mm")).trainModel().get();
////      HashMap<String, Double> coefs3 = model3.coefficients();
////      HashMap<String, Double> coefs4 = model4.coefficients();
////      // compare against each other
////      for(String s:coefs4.keySet()) {
////        String s1 = s;
////        if(s.startsWith("Origin"))
////          s1 = "Origin." + s.substring(6);
////        if(s.startsWith("Dest"))
////          s1 = "Dest." + s.substring(4);
////        if(s.startsWith("UniqueCarrier"))
////          s1 = "UniqueCarrier." + s.substring(13);
////        assertEquals("coeff " + s1 + " differs, " + coefs3.get(s1) + " != " + coefs4.get(s), coefs3.get(s1), coefs4.get(s),1e-4);
////      }
//
//    } finally {
//      fr.delete();
//      frMM.delete();
//      if(res != null)res.delete();
//      if(model1 != null)model1.delete();
//      if(model2 != null)model2.delete();
//      if(model3 != null)model3.delete();
//      if(model4 != null)model4.delete();
////      if(score != null)score.delete();
//      Scope.exit();
//    }
//  }
/**
   * Test we get correct gram on dataset which contains categoricals and sparse and dense numbers
   */
@Test
public void testSparseGramComputation() {
    Random rnd = new Random(123456789l);
    double[] d0 = MemoryManager.malloc8d(1000);
    double[] d1 = MemoryManager.malloc8d(1000);
    double[] d2 = MemoryManager.malloc8d(1000);
    double[] d3 = MemoryManager.malloc8d(1000);
    double[] d4 = MemoryManager.malloc8d(1000);
    double[] d5 = MemoryManager.malloc8d(1000);
    double[] d6 = MemoryManager.malloc8d(1000);
    double[] d7 = MemoryManager.malloc8d(1000);
    double[] d8 = MemoryManager.malloc8d(1000);
    double[] d9 = MemoryManager.malloc8d(1000);
    long[] c1 = MemoryManager.malloc8(1000);
    long[] c2 = MemoryManager.malloc8(1000);
    String[] dom = new String[] { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z" };
    for (int i = 0; i < d1.length; ++i) {
        c1[i] = rnd.nextInt(dom.length);
        c2[i] = rnd.nextInt(dom.length);
        d0[i] = rnd.nextDouble();
        d1[i] = rnd.nextDouble();
    }
    for (int i = 0; i < 30; ++i) {
        d2[rnd.nextInt(d2.length)] = rnd.nextDouble();
        d3[rnd.nextInt(d2.length)] = rnd.nextDouble();
        d4[rnd.nextInt(d2.length)] = rnd.nextDouble();
        d5[rnd.nextInt(d2.length)] = rnd.nextDouble();
        d6[rnd.nextInt(d2.length)] = rnd.nextDouble();
        d7[rnd.nextInt(d2.length)] = rnd.nextDouble();
        d8[rnd.nextInt(d2.length)] = rnd.nextDouble();
        d9[rnd.nextInt(d2.length)] = 1;
    }
    Vec.VectorGroup vg_1 = Vec.VectorGroup.VG_LEN1;
    Vec v01 = Vec.makeVec(c1, dom, vg_1.addVec());
    Vec v02 = Vec.makeVec(c2, dom, vg_1.addVec());
    Vec v03 = Vec.makeVec(d0, vg_1.addVec());
    Vec v04 = Vec.makeVec(d1, vg_1.addVec());
    Vec v05 = Vec.makeVec(d2, vg_1.addVec());
    Vec v06 = Vec.makeVec(d3, vg_1.addVec());
    Vec v07 = Vec.makeVec(d4, vg_1.addVec());
    Vec v08 = Vec.makeVec(d5, vg_1.addVec());
    Vec v09 = Vec.makeVec(d6, vg_1.addVec());
    Vec v10 = Vec.makeVec(d7, vg_1.addVec());
    Vec v11 = Vec.makeVec(d8, vg_1.addVec());
    Vec v12 = Vec.makeVec(d9, vg_1.addVec());
    Frame f = new Frame(Key.<Frame>make("TestData"), null, new Vec[] { v01, v02, v03, v04, v05, v05, v06, v07, v08, v09, v10, v11, v12 });
    DKV.put(f);
    DataInfo dinfo = new DataInfo(f, null, 1, true, DataInfo.TransformType.STANDARDIZE, DataInfo.TransformType.NONE, true, false, false, false, false, false);
    GLMParameters params = new GLMParameters(Family.gaussian);
    //                              public  GLMIterationTask(Key jobKey, DataInfo dinfo, GLMWeightsFun glmw,double [] beta, double lambda) {
    final GLMIterationTask glmtSparse = new GLMIterationTask(null, dinfo, new GLMWeightsFun(params), null).setSparse(true).doAll(dinfo._adaptedFrame);
    final GLMIterationTask glmtDense = new GLMIterationTask(null, dinfo, new GLMWeightsFun(params), null).setSparse(false).doAll(dinfo._adaptedFrame);
    for (int i = 0; i < glmtDense._xy.length; ++i) {
        for (int j = 0; j <= i; ++j) {
            assertEquals(glmtDense._gram.get(i, j), glmtSparse._gram.get(i, j), 1e-8);
        }
        assertEquals(glmtDense._xy[i], glmtSparse._xy[i], 1e-8);
    }
    final double[] beta = MemoryManager.malloc8d(dinfo.fullN() + 1);
    // now do the same but weighted, use LSM solution as beta to generate meaningfull weights
    H2O.submitTask(new H2OCountedCompleter() {

        @Override
        public void compute2() {
            new GLM.GramSolver(glmtDense._gram, glmtDense._xy, true, 1e-5, 0, null, null, null, null).solve(null, beta);
            tryComplete();
        }
    }).join();
    final GLMIterationTask glmtSparse2 = new GLMIterationTask(null, dinfo, new GLMWeightsFun(params), beta).setSparse(true).doAll(dinfo._adaptedFrame);
    final GLMIterationTask glmtDense2 = new GLMIterationTask(null, dinfo, new GLMWeightsFun(params), beta).setSparse(false).doAll(dinfo._adaptedFrame);
    for (int i = 0; i < glmtDense2._xy.length; ++i) {
        for (int j = 0; j <= i; ++j) {
            assertEquals(glmtDense2._gram.get(i, j), glmtSparse2._gram.get(i, j), 1e-8);
        }
        assertEquals(glmtDense2._xy[i], glmtSparse2._xy[i], 1e-8);
    }
    dinfo.remove();
    f.delete();
}
Also used : BufferedString(water.parser.BufferedString) GLMWeightsFun(hex.glm.GLMModel.GLMWeightsFun) GLMParameters(hex.glm.GLMModel.GLMParameters) H2OCountedCompleter(water.H2O.H2OCountedCompleter)

Example 14 with GLMParameters

use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.

the class GLMTest method testCoordinateDescent_airlines.

@Test
public void testCoordinateDescent_airlines() {
    GLMModel model = null;
    Key parsed = Key.make("airlines_parsed");
    Key<GLMModel> modelKey = Key.make("airlines_model");
    Frame fr = parse_test_file(parsed, "smalldata/airlines/AirlinesTrain.csv.zip");
    try {
        // H2O differs on intercept and race, same residual deviance though
        GLMParameters params = new GLMParameters();
        params._standardize = true;
        params._family = Family.binomial;
        params._solver = Solver.COORDINATE_DESCENT_NAIVE;
        params._response_column = "IsDepDelayed";
        params._ignored_columns = new String[] { "IsDepDelayed_REC" };
        params._train = fr._key;
        GLM glm = new GLM(params, modelKey);
        model = glm.trainModel().get();
        assertTrue(glm.isStopped());
        System.out.println(model._output._training_metrics);
    } finally {
        fr.delete();
        if (model != null)
            model.delete();
    }
}
Also used : GLMParameters(hex.glm.GLMModel.GLMParameters)

Example 15 with GLMParameters

use of hex.glm.GLMModel.GLMParameters in project h2o-3 by h2oai.

the class GLMTest method test_COD_Airlines_LambdaSearch.

@Test
public void test_COD_Airlines_LambdaSearch() {
    GLMModel model1 = null;
    //  Distance + Origin + Dest + UniqueCarrier
    Frame fr = parse_test_file(Key.make("Airlines"), "smalldata/airlines/AirlinesTrain.csv.zip");
    String[] ignoredCols = new String[] { "IsDepDelayed_REC" };
    try {
        Scope.enter();
        GLMParameters params = new GLMParameters(Family.binomial);
        params._response_column = "IsDepDelayed";
        params._ignored_columns = ignoredCols;
        params._train = fr._key;
        params._valid = fr._key;
        // new double [] {0.25};
        params._lambda = null;
        params._alpha = new double[] { 1 };
        params._standardize = false;
        //IRLSM
        params._solver = Solver.COORDINATE_DESCENT_NAIVE;
        params._lambda_search = true;
        params._nlambdas = 5;
        GLM glm = new GLM(params);
        model1 = glm.trainModel().get();
        GLMModel.Submodel sm = model1._output._submodels[model1._output._submodels.length - 1];
        double[] beta = sm.beta;
        System.out.println("lambda " + sm.lambda_value);
        double l1pen = ArrayUtils.l1norm(beta, true);
        double l2pen = ArrayUtils.l2norm2(beta, true);
    //      double objective = job.likelihood()/model1._nobs + // gives likelihood of the last lambda
    //              params._l2pen[params._l2pen.length-1]*params._alpha[0]*l1pen + params._l2pen[params._l2pen.length-1]*(1-params._alpha[0])*l2pen/2  ;
    //      assertEquals(0.65689, objective,1e-4);
    } finally {
        fr.delete();
        if (model1 != null)
            model1.delete();
    }
}
Also used : GLMParameters(hex.glm.GLMModel.GLMParameters) BufferedString(water.parser.BufferedString)

Aggregations

GLMParameters (hex.glm.GLMModel.GLMParameters)50 Test (org.junit.Test)23 Solver (hex.glm.GLMModel.GLMParameters.Solver)16 ModelMetricsBinomialGLM (hex.ModelMetricsBinomialGLM)13 BufferedString (water.parser.BufferedString)10 ModelMetricsRegressionGLM (hex.ModelMetricsRegressionGLM)8 Frame (water.fvec.Frame)7 H2OModelBuilderIllegalArgumentException (water.exceptions.H2OModelBuilderIllegalArgumentException)6 ModelMetricsMultinomialGLM (hex.ModelMetricsBinomialGLM.ModelMetricsMultinomialGLM)4 GLMWeightsFun (hex.glm.GLMModel.GLMWeightsFun)4 HashMap (java.util.HashMap)4 NFSFileVec (water.fvec.NFSFileVec)3 hex (hex)2 DataInfo (hex.DataInfo)2 GLMGradientSolver (hex.glm.GLM.GLMGradientSolver)2 GradientInfo (hex.optimization.OptimizationUtils.GradientInfo)2 GLMTask (hex.glm.GLMTask)1 GradientSolver (hex.optimization.OptimizationUtils.GradientSolver)1 H2OCountedCompleter (water.H2O.H2OCountedCompleter)1