Search in sources :

Example 76 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class GBMTest method testHuberDeltaTiny.

@Test
public void testHuberDeltaTiny() {
    Frame tfr = null;
    GBMModel gbm = null;
    try {
        tfr = parse_test_file("./smalldata/gbm_test/BostonHousing.csv");
        GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
        parms._train = tfr._key;
        parms._response_column = tfr.lastVecName();
        parms._seed = 0xdecaf;
        parms._distribution = huber;
        //everything is an outlier and we should get laplace loss
        parms._huber_alpha = 1e-2;
        gbm = new GBM(parms).trainModel().get();
        Assert.assertEquals(8.05716257, ((ModelMetricsRegression) gbm._output._training_metrics)._MSE, 0.3);
        // Huber loss can be derived from MAE since no obs weights
        //hardcoded from output
        double delta = 0.0047234;
        //see laplace above
        double MAE = 1.42298;
        Assert.assertEquals((2 * MAE - delta) * delta, ((ModelMetricsRegression) gbm._output._training_metrics)._mean_residual_deviance, 2e-4);
    } finally {
        if (tfr != null)
            tfr.delete();
        if (gbm != null)
            gbm.deleteCrossValidationModels();
        if (gbm != null)
            gbm.delete();
    }
}
Also used : Frame(water.fvec.Frame) Test(org.junit.Test)

Example 77 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class DRFTest method testReproducibilityAirline.

// PUBDEV-557 Test dependency on # nodes (for small number of bins, but fixed number of chunks)
@Test
public void testReproducibilityAirline() {
    Frame tfr = null;
    final int N = 1;
    double[] mses = new double[N];
    Scope.enter();
    try {
        // Load data, hack frames
        tfr = parse_test_file("./smalldata/airlines/allyears2k_headers.zip");
        // rebalance to fixed number of chunks
        Key dest = Key.make("df.rebalanced.hex");
        RebalanceDataSet rb = new RebalanceDataSet(tfr, dest, 256);
        H2O.submitTask(rb);
        rb.join();
        tfr.delete();
        tfr = DKV.get(dest).get();
        //      DKV.put(tfr);
        for (String s : new String[] { "DepTime", "ArrTime", "ActualElapsedTime", "AirTime", "ArrDelay", "DepDelay", "Cancelled", "CancellationCode", "CarrierDelay", "WeatherDelay", "NASDelay", "SecurityDelay", "LateAircraftDelay", "IsArrDelayed" }) {
            tfr.remove(s).remove();
        }
        DKV.put(tfr);
        for (int i = 0; i < N; ++i) {
            DRFModel.DRFParameters parms = new DRFModel.DRFParameters();
            parms._train = tfr._key;
            parms._response_column = "IsDepDelayed";
            parms._nbins = 10;
            parms._nbins_cats = 1024;
            parms._ntrees = 7;
            parms._max_depth = 10;
            parms._binomial_double_trees = false;
            parms._mtries = -1;
            parms._min_rows = 1;
            // Simulated sampling with replacement
            parms._sample_rate = 0.632f;
            parms._balance_classes = true;
            parms._seed = (1L << 32) | 2;
            // Build a first model; all remaining models should be equal
            DRFModel drf = new DRF(parms).trainModel().get();
            assertEquals(drf._output._ntrees, parms._ntrees);
            mses[i] = drf._output._training_metrics.mse();
            drf.delete();
        }
    } finally {
        if (tfr != null)
            tfr.remove();
    }
    Scope.exit();
    for (int i = 0; i < mses.length; ++i) {
        Log.info("trial: " + i + " -> MSE: " + mses[i]);
    }
    for (int i = 0; i < mses.length; ++i) {
        //check for the same result on 1 nodes and 5 nodes
        assertEquals(0.20377446328850304, mses[i], 1e-4);
    }
}
Also used : Frame(water.fvec.Frame) SplitFrame(hex.SplitFrame) RebalanceDataSet(water.fvec.RebalanceDataSet) Test(org.junit.Test)

Example 78 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class DRFTest method testNfoldsCVAndValidation.

@Test
public void testNfoldsCVAndValidation() {
    Frame tfr = null, vfr = null;
    DRFModel drf = null;
    Scope.enter();
    try {
        tfr = parse_test_file("smalldata/junit/weights.csv");
        vfr = parse_test_file("smalldata/junit/weights.csv");
        DKV.put(tfr);
        DRFModel.DRFParameters parms = new DRFModel.DRFParameters();
        parms._train = tfr._key;
        parms._valid = vfr._key;
        parms._response_column = "response";
        parms._min_rows = 2;
        parms._max_depth = 2;
        parms._nfolds = 2;
        parms._ntrees = 3;
        parms._seed = 11233;
        try {
            Log.info("Trying N-fold cross-validation AND Validation dataset provided.");
            drf = new DRF(parms).trainModel().get();
        } catch (H2OModelBuilderIllegalArgumentException e) {
            Assert.fail("Should not toss H2OModelBuilderIllegalArgumentException.");
        }
    } finally {
        if (tfr != null)
            tfr.remove();
        if (vfr != null)
            vfr.remove();
        if (drf != null) {
            drf.deleteCrossValidationModels();
            drf.delete();
        }
        Scope.exit();
    }
}
Also used : Frame(water.fvec.Frame) SplitFrame(hex.SplitFrame) H2OModelBuilderIllegalArgumentException(water.exceptions.H2OModelBuilderIllegalArgumentException) Test(org.junit.Test)

Example 79 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class DRFTest method testRowWeightsTiny.

@Test
public void testRowWeightsTiny() {
    Frame tfr = null, vfr = null;
    DRFModel drf = null;
    Scope.enter();
    try {
        tfr = parse_test_file("smalldata/junit/weights_all_tiny.csv");
        DKV.put(tfr);
        DRFModel.DRFParameters parms = new DRFModel.DRFParameters();
        parms._train = tfr._key;
        parms._response_column = "response";
        parms._weights_column = "weight";
        parms._seed = 234;
        // in terms of weighted rows
        parms._min_rows = 0.01242;
        parms._max_depth = 2;
        parms._ntrees = 3;
        // Build a first model; all remaining models should be equal
        drf = new DRF(parms).trainModel().get();
        // OOB
        ModelMetricsBinomial mm = (ModelMetricsBinomial) drf._output._training_metrics;
        assertEquals(_AUC, mm.auc_obj()._auc, 1e-8);
        assertEquals(_MSE, mm.mse(), 1e-8);
        assertEquals(_LogLoss, mm.logloss(), 1e-6);
    } finally {
        if (tfr != null)
            tfr.remove();
        if (vfr != null)
            vfr.remove();
        if (drf != null)
            drf.delete();
        Scope.exit();
    }
}
Also used : Frame(water.fvec.Frame) SplitFrame(hex.SplitFrame) ModelMetricsBinomial(hex.ModelMetricsBinomial) Test(org.junit.Test)

Example 80 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class DRFTest method testStochasticDRFEquivalent.

@Test
public void testStochasticDRFEquivalent() {
    Frame tfr = null, vfr = null;
    DRFModel drf = null;
    Scope.enter();
    try {
        tfr = parse_test_file("./smalldata/junit/cars.csv");
        for (String s : new String[] { "name" }) {
            tfr.remove(s).remove();
        }
        DKV.put(tfr);
        DRFModel.DRFParameters parms = new DRFModel.DRFParameters();
        parms._train = tfr._key;
        //regression
        parms._response_column = "cylinders";
        parms._seed = 234;
        parms._min_rows = 2;
        parms._max_depth = 5;
        parms._ntrees = 5;
        parms._mtries = 3;
        parms._sample_rate = 0.5f;
        // Build a first model; all remaining models should be equal
        drf = new DRF(parms).trainModel().get();
        ModelMetricsRegression mm = (ModelMetricsRegression) drf._output._training_metrics;
        assertEquals(0.12358322821934015, mm.mse(), 1e-4);
    } finally {
        if (tfr != null)
            tfr.remove();
        if (vfr != null)
            vfr.remove();
        if (drf != null)
            drf.delete();
        Scope.exit();
    }
}
Also used : Frame(water.fvec.Frame) SplitFrame(hex.SplitFrame) ModelMetricsRegression(hex.ModelMetricsRegression) Test(org.junit.Test)

Aggregations

Frame (water.fvec.Frame)782 Test (org.junit.Test)435 Vec (water.fvec.Vec)215 ValFrame (water.rapids.vals.ValFrame)132 NFSFileVec (water.fvec.NFSFileVec)66 Val (water.rapids.Val)65 SplitFrame (hex.SplitFrame)59 Key (water.Key)56 DeepLearningParameters (hex.deeplearning.DeepLearningModel.DeepLearningParameters)54 Chunk (water.fvec.Chunk)50 NewChunk (water.fvec.NewChunk)37 MRTask (water.MRTask)33 ShuffleSplitFrame (hex.splitframe.ShuffleSplitFrame)31 Ignore (org.junit.Ignore)28 Random (java.util.Random)26 File (java.io.File)25 BufferedString (water.parser.BufferedString)21 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)19 HashMap (java.util.HashMap)17 hex (hex)16