Search in sources :

Example 61 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class GBMTest method testGBMRegressionGaussian.

@Test
public void testGBMRegressionGaussian() {
    GBMModel gbm = null;
    Frame fr = null, fr2 = null;
    try {
        fr = parse_test_file("./smalldata/gbm_test/Mfgdata_gaussian_GBM_testing.csv");
        GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
        parms._train = fr._key;
        parms._distribution = gaussian;
        // Row in col 0, dependent in col 1, predictor in col 2
        parms._response_column = fr._names[1];
        parms._ntrees = 1;
        parms._max_depth = 1;
        parms._min_rows = 1;
        parms._nbins = 20;
        // Drop ColV2 0 (row), keep 1 (response), keep col 2 (only predictor), drop remaining cols
        String[] xcols = parms._ignored_columns = new String[fr.numCols() - 2];
        xcols[0] = fr._names[0];
        System.arraycopy(fr._names, 3, xcols, 1, fr.numCols() - 3);
        parms._learn_rate = 1.0f;
        parms._score_each_iteration = true;
        GBM job = new GBM(parms);
        gbm = job.trainModel().get();
        //HEX-1817
        Assert.assertTrue(job.isStopped());
        // Done building model; produce a score column with predictions
        fr2 = gbm.score(fr);
        //job.response() can be used in place of fr.vecs()[1] but it has been rebalanced
        double sq_err = new MathUtils.SquareError().doAll(fr.vecs()[1], fr2.vecs()[0])._sum;
        double mse = sq_err / fr2.numRows();
        assertEquals(79152.12337641386, mse, 0.1);
        assertEquals(79152.12337641386, gbm._output._scored_train[1]._mse, 0.1);
        assertEquals(79152.12337641386, gbm._output._scored_train[1]._mean_residual_deviance, 0.1);
    } finally {
        if (fr != null)
            fr.remove();
        if (fr2 != null)
            fr2.remove();
        if (gbm != null)
            gbm.remove();
    }
}
Also used : Frame(water.fvec.Frame) Test(org.junit.Test)

Example 62 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class GBMTest method testKDDTrees.

// Test uses big data and is too slow for a pre-push
@Test
@Ignore
public void testKDDTrees() {
    Frame tfr = null, vfr = null;
    String[] cols = new String[] { "DOB", "LASTGIFT", "TARGET_D" };
    try {
        // Load data, hack frames
        Frame inF1 = parse_test_file("bigdata/laptop/usecases/cup98LRN_z.csv");
        Frame inF2 = parse_test_file("bigdata/laptop/usecases/cup98VAL_z.csv");
        // Just the columns to train on
        tfr = inF1.subframe(cols);
        vfr = inF2.subframe(cols);
        // Toss all the rest away
        inF1.remove(cols).remove();
        inF2.remove(cols).remove();
        // Convert 'DOB' to categorical
        tfr.replace(0, tfr.vec("DOB").toCategoricalVec());
        vfr.replace(0, vfr.vec("DOB").toCategoricalVec());
        DKV.put(tfr);
        DKV.put(vfr);
        // Same parms for all
        GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
        parms._train = tfr._key;
        parms._valid = vfr._key;
        parms._response_column = "TARGET_D";
        parms._ntrees = 3;
        parms._distribution = gaussian;
        // Build a first model; all remaining models should be equal
        GBM job1 = new GBM(parms);
        GBMModel gbm1 = job1.trainModel().get();
        // Validation MSE should be equal
        ScoreKeeper[] firstScored = gbm1._output._scored_valid;
        // Build 10 more models, checking for equality
        for (int i = 0; i < 10; i++) {
            GBM job2 = new GBM(parms);
            GBMModel gbm2 = job2.trainModel().get();
            ScoreKeeper[] secondScored = gbm2._output._scored_valid;
            // Check that MSE's from both models are equal
            int j;
            for (j = 0; j < firstScored.length; j++) if (firstScored[j] != secondScored[j])
                // Not Equals Enough
                break;
            // Report on unequal
            if (j < firstScored.length) {
                System.out.println("=== =============== ===");
                System.out.println("=== ORIGINAL  MODEL ===");
                for (int t = 0; t < parms._ntrees; t++) System.out.println(gbm1._output.toStringTree(t, 0));
                System.out.println("=== DIFFERENT MODEL ===");
                for (int t = 0; t < parms._ntrees; t++) System.out.println(gbm2._output.toStringTree(t, 0));
                System.out.println("=== =============== ===");
                Assert.assertArrayEquals("GBM should have the exact same MSEs for identical parameters", firstScored, secondScored);
            }
            gbm2.delete();
        }
        gbm1.delete();
    } finally {
        if (tfr != null)
            tfr.remove();
        if (vfr != null)
            vfr.remove();
    }
}
Also used : Frame(water.fvec.Frame) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 63 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class GBMTest method testModelAdaptMultinomial.

// Adapt a trained model to a test dataset with different categoricals
@Test
public void testModelAdaptMultinomial() {
    GBMModel gbm = null;
    GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
    try {
        Scope.enter();
        Frame v;
        parms._train = (parse_test_file("smalldata/junit/mixcat_train.csv"))._key;
        parms._valid = (v = parse_test_file("smalldata/junit/mixcat_test.csv"))._key;
        // Train on the outcome
        parms._response_column = "Response";
        // Build a CART tree - 1 tree, full learn rate, down to 1 row
        parms._ntrees = 1;
        parms._learn_rate = 1.0f;
        parms._min_rows = 1;
        parms._distribution = DistributionFamily.multinomial;
        gbm = new GBM(parms).trainModel().get();
        Frame res = gbm.score(v);
        int[] ps = new int[(int) v.numRows()];
        Vec.Reader vr = res.vecs()[0].new Reader();
        for (int i = 0; i < ps.length; i++) ps[i] = (int) vr.at8(i);
        // Expected predictions are X,X,Y,Y,X,Y,Z,X,Y
        // Never predicts W, the extra class in the test set.
        // Badly predicts Z because 1 tree does not pick up that feature#2 can also
        // be used to predict Z, and instead relies on factor C which does not appear
        // in the test set.
        Assert.assertArrayEquals("", ps, new int[] { 1, 1, 2, 2, 1, 2, 3, 1, 2 });
        hex.ModelMetricsMultinomial mm = hex.ModelMetricsMultinomial.getFromDKV(gbm, parms.valid());
        // Build a POJO, validate same results
        Assert.assertTrue(gbm.testJavaScoring(v, res, 1e-15));
        res.remove();
    } finally {
        parms._train.remove();
        parms._valid.remove();
        if (gbm != null)
            gbm.delete();
        Scope.exit();
    }
}
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec) FVecTest.makeByteVec(water.fvec.FVecTest.makeByteVec) hex(hex) Test(org.junit.Test)

Example 64 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class GBMTest method testNfoldsInvalidValues.

@Test
public void testNfoldsInvalidValues() {
    Frame tfr = null;
    GBMModel gbm1 = null;
    GBMModel gbm2 = null;
    GBMModel gbm3 = null;
    Scope.enter();
    try {
        tfr = parse_test_file("smalldata/junit/weights.csv");
        DKV.put(tfr);
        GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
        parms._train = tfr._key;
        parms._response_column = "response";
        parms._min_rows = 1;
        parms._seed = 12345;
        parms._max_depth = 2;
        parms._ntrees = 3;
        parms._learn_rate = 1e-3f;
        parms._nfolds = 0;
        gbm1 = new GBM(parms).trainModel().get();
        parms._nfolds = 1;
        try {
            Log.info("Trying nfolds==1.");
            gbm2 = new GBM(parms).trainModel().get();
            Assert.fail("Should toss H2OModelBuilderIllegalArgumentException instead of reaching here");
        } catch (H2OModelBuilderIllegalArgumentException e) {
        }
        parms._nfolds = -99;
        try {
            Log.info("Trying nfolds==-99.");
            gbm3 = new GBM(parms).trainModel().get();
            Assert.fail("Should toss H2OModelBuilderIllegalArgumentException instead of reaching here");
        } catch (H2OModelBuilderIllegalArgumentException e) {
        }
    } finally {
        if (tfr != null)
            tfr.remove();
        if (gbm1 != null)
            gbm1.delete();
        if (gbm2 != null)
            gbm2.delete();
        if (gbm3 != null)
            gbm3.delete();
        Scope.exit();
    }
}
Also used : Frame(water.fvec.Frame) H2OModelBuilderIllegalArgumentException(water.exceptions.H2OModelBuilderIllegalArgumentException) Test(org.junit.Test)

Example 65 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class GBMTest method testQuantileRegression.

@Test
public void testQuantileRegression() {
    GBMModel gbm = null;
    GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
    Frame pred = null, res = null;
    Scope.enter();
    try {
        Frame train = parse_test_file("smalldata/gbm_test/ecology_model.csv");
        // Remove unique ID
        train.remove("Site").remove();
        // Remove categorical
        train.remove("Method").remove();
        // Update frame after hacking it
        DKV.put(train);
        parms._train = train._key;
        // Train on the outcome
        parms._response_column = "DSDist";
        parms._distribution = DistributionFamily.quantile;
        parms._quantile_alpha = 0.4;
        parms._sample_rate = 0.6f;
        parms._col_sample_rate = 0.8f;
        parms._col_sample_rate_per_tree = 0.8f;
        parms._seed = 1234;
        GBM job = new GBM(parms);
        gbm = job.trainModel().get();
        pred = parse_test_file("smalldata/gbm_test/ecology_eval.csv");
        res = gbm.score(pred);
        // Build a POJO, validate same results
        Assert.assertTrue(gbm.testJavaScoring(pred, res, 1e-15));
        Assert.assertTrue(Math.abs(((ModelMetricsRegression) gbm._output._training_metrics)._mean_residual_deviance - 10.69611) < 1e-4);
    } finally {
        parms._train.remove();
        if (gbm != null)
            gbm.delete();
        if (pred != null)
            pred.remove();
        if (res != null)
            res.remove();
        Scope.exit();
    }
}
Also used : Frame(water.fvec.Frame) Test(org.junit.Test)

Aggregations

Frame (water.fvec.Frame)782 Test (org.junit.Test)435 Vec (water.fvec.Vec)215 ValFrame (water.rapids.vals.ValFrame)132 NFSFileVec (water.fvec.NFSFileVec)66 Val (water.rapids.Val)65 SplitFrame (hex.SplitFrame)59 Key (water.Key)56 DeepLearningParameters (hex.deeplearning.DeepLearningModel.DeepLearningParameters)54 Chunk (water.fvec.Chunk)50 NewChunk (water.fvec.NewChunk)37 MRTask (water.MRTask)33 ShuffleSplitFrame (hex.splitframe.ShuffleSplitFrame)31 Ignore (org.junit.Ignore)28 Random (java.util.Random)26 File (java.io.File)25 BufferedString (water.parser.BufferedString)21 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)19 HashMap (java.util.HashMap)17 hex (hex)16