Search in sources :

Example 31 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class GBMTest method testNARight.

// PUBDEV-2822
@Test
public void testNARight() {
    String xy = ",10\n1,0\n2,0\n3,0\n4,10\n,10";
    Key tr = Key.make("train");
    Frame df = ParseDataset.parse(tr, makeByteVec(Key.make("xy"), xy));
    GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
    parms._train = tr;
    parms._response_column = "C2";
    parms._min_rows = 1;
    parms._learn_rate = 1;
    parms._ntrees = 1;
    GBM job = new GBM(parms);
    GBMModel gbm = job.trainModel().get();
    Frame preds = gbm.score(df);
    Log.info(df);
    Log.info(preds);
    Assert.assertTrue(gbm.testJavaScoring(df, preds, 1e-15));
    Assert.assertTrue(preds.vec(0).at(0) == 10);
    Assert.assertTrue(preds.vec(0).at(1) == 0);
    Assert.assertTrue(preds.vec(0).at(2) == 0);
    Assert.assertTrue(preds.vec(0).at(3) == 0);
    Assert.assertTrue(preds.vec(0).at(4) == 10);
    Assert.assertTrue(preds.vec(0).at(5) == 10);
    preds.remove();
    gbm.remove();
    df.remove();
}
Also used : Frame(water.fvec.Frame) Test(org.junit.Test)

Example 32 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class GBMTest method unseenMissing.

@Test
public void unseenMissing() {
    GBMModel gbm = null;
    GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
    Frame train = null, test = null, train_preds = null, test_preds = null;
    Scope.enter();
    try {
        {
            CreateFrame cf = new CreateFrame();
            cf.rows = 100;
            cf.cols = 10;
            cf.integer_range = 1000;
            cf.categorical_fraction = 1.0;
            cf.integer_fraction = 0.0;
            cf.binary_fraction = 0.0;
            cf.time_fraction = 0.0;
            cf.string_fraction = 0.0;
            cf.binary_ones_fraction = 0.0;
            cf.missing_fraction = 0.0;
            cf.factors = 3;
            cf.response_factors = 2;
            cf.positive_response = false;
            cf.has_response = true;
            cf.seed = 1235;
            cf.seed_for_column_types = 1234;
            train = cf.execImpl().get();
        }
        {
            CreateFrame cf = new CreateFrame();
            cf.rows = 100;
            cf.cols = 10;
            cf.integer_range = 1000;
            cf.categorical_fraction = 1.0;
            cf.integer_fraction = 0.0;
            cf.binary_fraction = 0.0;
            cf.time_fraction = 0.0;
            cf.string_fraction = 0.0;
            cf.binary_ones_fraction = 0.0;
            cf.missing_fraction = 0.8;
            cf.factors = 3;
            cf.response_factors = 2;
            cf.positive_response = false;
            cf.has_response = true;
            //different test set
            cf.seed = 4321;
            cf.seed_for_column_types = 1234;
            test = cf.execImpl().get();
        }
        parms._train = train._key;
        // Train on the outcome
        parms._response_column = "response";
        parms._distribution = DistributionFamily.multinomial;
        parms._max_depth = 20;
        parms._min_rows = 1;
        parms._ntrees = 5;
        parms._seed = 1;
        GBM job = new GBM(parms);
        gbm = job.trainModel().get();
        train_preds = gbm.score(train);
        test_preds = gbm.score(test);
        // Build a POJO, validate same results
        Assert.assertTrue(gbm.testJavaScoring(train, train_preds, 1e-15));
        Key old = gbm._key;
        gbm._key = Key.make(gbm._key + "ha");
        Assert.assertTrue(gbm.testJavaScoring(test, test_preds, 1e-15));
        DKV.remove(old);
    } finally {
        if (gbm != null)
            gbm.delete();
        if (train != null)
            train.remove();
        if (test != null)
            test.remove();
        if (train_preds != null)
            train_preds.remove();
        if (test_preds != null)
            test_preds.remove();
        Scope.exit();
    }
}
Also used : Frame(water.fvec.Frame) Test(org.junit.Test)

Example 33 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class GBMTest method testGaussian.

@Test
public void testGaussian() {
    Frame tfr = null;
    GBMModel gbm = null;
    try {
        tfr = parse_test_file("./smalldata/gbm_test/BostonHousing.csv");
        GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
        parms._train = tfr._key;
        parms._response_column = tfr.lastVecName();
        parms._seed = 0xdecaf;
        parms._distribution = gaussian;
        gbm = new GBM(parms).trainModel().get();
        Assert.assertEquals(2.9423857564, ((ModelMetricsRegression) gbm._output._training_metrics)._MSE, 1e-5);
        Assert.assertEquals(2.9423857564, ((ModelMetricsRegression) gbm._output._training_metrics)._mean_residual_deviance, 1e-5);
    } finally {
        if (tfr != null)
            tfr.delete();
        if (gbm != null)
            gbm.deleteCrossValidationModels();
        if (gbm != null)
            gbm.delete();
    }
}
Also used : Frame(water.fvec.Frame) Test(org.junit.Test)

Example 34 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class GBMTest method testStochasticGBMHoldout.

@Test
public void testStochasticGBMHoldout() {
    Frame tfr = null;
    Key[] ksplits = new Key[0];
    try {
        tfr = parse_test_file("./smalldata/gbm_test/ecology_model.csv");
        SplitFrame sf = new SplitFrame(tfr, new double[] { 0.5, 0.5 }, new Key[] { Key.make("train.hex"), Key.make("test.hex") });
        // Invoke the job
        sf.exec().get();
        ksplits = sf._destination_frames;
        GBMModel gbm = null;
        float[] sample_rates = new float[] { 0.2f, 0.4f, 0.8f, 1.0f };
        float[] col_sample_rates = new float[] { 0.4f, 0.8f, 1.0f };
        float[] col_sample_rates_per_tree = new float[] { 0.4f, 0.6f, 1.0f };
        Map<Double, Triple<Float>> hm = new TreeMap<>();
        for (float sample_rate : sample_rates) {
            for (float col_sample_rate : col_sample_rates) {
                for (float col_sample_rate_per_tree : col_sample_rates_per_tree) {
                    Scope.enter();
                    try {
                        GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
                        parms._train = ksplits[0];
                        parms._valid = ksplits[1];
                        //regression
                        parms._response_column = "Angaus";
                        parms._seed = 42;
                        parms._min_rows = 2;
                        parms._max_depth = 12;
                        parms._ntrees = 6;
                        parms._col_sample_rate = col_sample_rate;
                        parms._col_sample_rate_per_tree = col_sample_rate_per_tree;
                        parms._sample_rate = sample_rate;
                        // Build a first model; all remaining models should be equal
                        gbm = new GBM(parms).trainModel().get();
                        // too slow, but passes (now)
                        //            // Build a POJO, validate same results
                        //            Frame pred = gbm.score(tfr);
                        //            Assert.assertTrue(gbm.testJavaScoring(tfr,pred,1e-15));
                        //            pred.remove();
                        ModelMetricsRegression mm = (ModelMetricsRegression) gbm._output._validation_metrics;
                        hm.put(mm.mse(), new Triple<>(sample_rate, col_sample_rate, col_sample_rate_per_tree));
                    } finally {
                        if (gbm != null)
                            gbm.delete();
                        Scope.exit();
                    }
                }
            }
        }
        Iterator<Map.Entry<Double, Triple<Float>>> it;
        Triple<Float> last = null;
        // iterator over results (min to max MSE) - best to worst
        for (it = hm.entrySet().iterator(); it.hasNext(); ) {
            Map.Entry<Double, Triple<Float>> n = it.next();
            Log.info("MSE: " + n.getKey() + ", row sample: " + n.getValue().v1 + ", col sample: " + n.getValue().v2 + ", col sample per tree: " + n.getValue().v3);
            last = n.getValue();
        }
    // worst validation MSE should belong to the most overfit case (1.0, 1.0, 1.0)
    //      Assert.assertTrue(last.v1==sample_rates[sample_rates.length-1]);
    //      Assert.assertTrue(last.v2==col_sample_rates[col_sample_rates.length-1]);
    //      Assert.assertTrue(last.v3==col_sample_rates_per_tree[col_sample_rates_per_tree.length-1]);
    } finally {
        if (tfr != null)
            tfr.remove();
        for (Key k : ksplits) if (k != null)
            k.remove();
    }
}
Also used : Frame(water.fvec.Frame) TreeMap(java.util.TreeMap) Map(java.util.Map) TreeMap(java.util.TreeMap) Test(org.junit.Test)

Example 35 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class GBMTest method testHuber.

@Test
public void testHuber() {
    Frame tfr = null;
    GBMModel gbm = null;
    try {
        tfr = parse_test_file("./smalldata/gbm_test/BostonHousing.csv");
        GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
        parms._train = tfr._key;
        parms._response_column = tfr.lastVecName();
        parms._seed = 0xdecaf;
        parms._distribution = huber;
        //that's the default
        parms._huber_alpha = 0.9;
        gbm = new GBM(parms).trainModel().get();
        Assert.assertEquals(4.447062185, ((ModelMetricsRegression) gbm._output._training_metrics)._MSE, 1e-5);
        Assert.assertEquals(1.962926332, ((ModelMetricsRegression) gbm._output._training_metrics)._mean_residual_deviance, 1e-4);
    } finally {
        if (tfr != null)
            tfr.delete();
        if (gbm != null)
            gbm.deleteCrossValidationModels();
        if (gbm != null)
            gbm.delete();
    }
}
Also used : Frame(water.fvec.Frame) Test(org.junit.Test)

Aggregations

Frame (water.fvec.Frame)782 Test (org.junit.Test)435 Vec (water.fvec.Vec)215 ValFrame (water.rapids.vals.ValFrame)132 NFSFileVec (water.fvec.NFSFileVec)66 Val (water.rapids.Val)65 SplitFrame (hex.SplitFrame)59 Key (water.Key)56 DeepLearningParameters (hex.deeplearning.DeepLearningModel.DeepLearningParameters)54 Chunk (water.fvec.Chunk)50 NewChunk (water.fvec.NewChunk)37 MRTask (water.MRTask)33 ShuffleSplitFrame (hex.splitframe.ShuffleSplitFrame)31 Ignore (org.junit.Ignore)28 Random (java.util.Random)26 File (java.io.File)25 BufferedString (water.parser.BufferedString)21 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)19 HashMap (java.util.HashMap)17 hex (hex)16