Search in sources :

Example 51 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class AggregatorTest method testChunks.

@Test
public void testChunks() {
    Frame frame = parse_test_file("smalldata/covtype/covtype.20k.data");
    AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters();
    parms._train = frame._key;
    parms._target_num_exemplars = 137;
    parms._rel_tol_num_exemplars = 0.05;
    long start = System.currentTimeMillis();
    // 0.418
    AggregatorModel agg = new Aggregator(parms).trainModel().get();
    System.out.println("AggregatorModel finished in: " + (System.currentTimeMillis() - start) / 1000. + " seconds");
    agg.checkConsistency();
    Frame output = agg._output._output_frame.get();
    checkNumExemplars(agg);
    output.remove();
    agg.remove();
    for (int i : new int[] { 1, 2, 5, 10, 50, 100 }) {
        Key key = Key.make();
        RebalanceDataSet rb = new RebalanceDataSet(frame, key, i);
        H2O.submitTask(rb);
        rb.join();
        Frame rebalanced = DKV.get(key).get();
        parms = new AggregatorModel.AggregatorParameters();
        parms._train = frame._key;
        parms._target_num_exemplars = 137;
        parms._rel_tol_num_exemplars = 0.05;
        start = System.currentTimeMillis();
        // 0.373 0.504 0.357 0.454 0.368 0.355
        AggregatorModel agg2 = new Aggregator(parms).trainModel().get();
        System.out.println("AggregatorModel finished in: " + (System.currentTimeMillis() - start) / 1000. + " seconds");
        agg2.checkConsistency();
        Log.info("Number of exemplars for " + i + " chunks: " + agg2._exemplars.length);
        rebalanced.delete();
        Assert.assertTrue(Math.abs(agg._exemplars.length - agg2._exemplars.length) == 0);
        output = agg2._output._output_frame.get();
        output.remove();
        checkNumExemplars(agg);
        agg2.remove();
    }
    frame.delete();
}
Also used : CreateFrame(hex.CreateFrame) Frame(water.fvec.Frame) RebalanceDataSet(water.fvec.RebalanceDataSet) Aggregator(hex.aggregator.Aggregator) AggregatorModel(hex.aggregator.AggregatorModel) Test(org.junit.Test)

Example 52 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class AggregatorTest method testAggregator.

public void testAggregator(int max) {
    CreateFrame cf = new CreateFrame();
    cf.rows = 100000;
    cf.cols = 2;
    cf.categorical_fraction = 0.1;
    cf.integer_fraction = 0.3;
    cf.real_range = 100;
    cf.integer_range = 100;
    cf.seed = 1234;
    Frame frame = cf.execImpl().get();
    AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters();
    parms._train = frame._key;
    parms._target_num_exemplars = max;
    long start = System.currentTimeMillis();
    AggregatorModel agg = new Aggregator(parms).trainModel().get();
    System.out.println("AggregatorModel finished in: " + (System.currentTimeMillis() - start) / 1000. + " seconds");
    agg.checkConsistency();
    Frame output = agg._output._output_frame.get();
    System.out.println(output.toTwoDimTable(0, 10));
    frame.delete();
    checkNumExemplars(agg);
    output.remove();
    agg.remove();
}
Also used : CreateFrame(hex.CreateFrame) Frame(water.fvec.Frame) Aggregator(hex.aggregator.Aggregator) CreateFrame(hex.CreateFrame) AggregatorModel(hex.aggregator.AggregatorModel)

Example 53 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class AggregatorTest method testCovtype.

@Test
public void testCovtype() {
    Frame frame = parse_test_file("smalldata/covtype/covtype.20k.data");
    AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters();
    parms._train = frame._key;
    parms._target_num_exemplars = 500;
    parms._rel_tol_num_exemplars = 0.05;
    long start = System.currentTimeMillis();
    // 0.179
    AggregatorModel agg = new Aggregator(parms).trainModel().get();
    System.out.println("AggregatorModel finished in: " + (System.currentTimeMillis() - start) / 1000. + " seconds");
    agg.checkConsistency();
    frame.delete();
    Frame output = agg._output._output_frame.get();
    Log.info("Exemplars: " + output.toString());
    output.remove();
    checkNumExemplars(agg);
    agg.remove();
}
Also used : CreateFrame(hex.CreateFrame) Frame(water.fvec.Frame) Aggregator(hex.aggregator.Aggregator) AggregatorModel(hex.aggregator.AggregatorModel) Test(org.junit.Test)

Example 54 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class GBMTest method testNACategorical.

// PUBDEV-2822
@Test
public void testNACategorical() {
    String xy = ",0\nA,0\nB,0\nA,0\nD,-10\n,0";
    Key tr = Key.make("train");
    Frame df = ParseDataset.parse(tr, makeByteVec(Key.make("xy"), xy));
    GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
    parms._train = tr;
    parms._response_column = "C2";
    parms._min_rows = 1;
    parms._learn_rate = 1;
    parms._ntrees = 1;
    GBM job = new GBM(parms);
    GBMModel gbm = job.trainModel().get();
    Frame preds = gbm.score(df);
    Log.info(df);
    Log.info(preds);
    Assert.assertTrue(gbm.testJavaScoring(df, preds, 1e-15));
    Assert.assertTrue(Math.abs(preds.vec(0).at(0) - 0) < 1e-6);
    Assert.assertTrue(Math.abs(preds.vec(0).at(1) - 0) < 1e-6);
    Assert.assertTrue(Math.abs(preds.vec(0).at(2) - 0) < 1e-6);
    Assert.assertTrue(Math.abs(preds.vec(0).at(3) - 0) < 1e-6);
    Assert.assertTrue(Math.abs(preds.vec(0).at(4) - -10) < 1e-6);
    Assert.assertTrue(Math.abs(preds.vec(0).at(5) - 0) < 1e-6);
    preds.remove();
    gbm.remove();
    df.remove();
}
Also used : Frame(water.fvec.Frame) Test(org.junit.Test)

Example 55 with Frame

use of water.fvec.Frame in project h2o-3 by h2oai.

the class GBMTest method testGBMTrainTest.

// Test-on-Train.  Slow test, needed to build a good model.
@Test
public void testGBMTrainTest() {
    GBMModel gbm = null;
    GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
    try {
        Scope.enter();
        parms._valid = parse_test_file("smalldata/gbm_test/ecology_eval.csv")._key;
        Frame train = parse_test_file("smalldata/gbm_test/ecology_model.csv");
        // Remove unique ID
        train.remove("Site").remove();
        // Convert response to categorical
        int ci = train.find("Angaus");
        Scope.track(train.replace(ci, train.vecs()[ci].toCategoricalVec()));
        // Update frame after hacking it
        DKV.put(train);
        parms._train = train._key;
        // Train on the outcome
        parms._response_column = "Angaus";
        parms._ntrees = 5;
        parms._max_depth = 5;
        parms._min_rows = 10;
        parms._nbins = 100;
        parms._learn_rate = .2f;
        parms._distribution = DistributionFamily.multinomial;
        gbm = new GBM(parms).trainModel().get();
        hex.ModelMetricsBinomial mm = hex.ModelMetricsBinomial.getFromDKV(gbm, parms.valid());
        double auc = mm._auc._auc;
        // Sanely good model
        Assert.assertTrue(0.83 <= auc && auc < 0.87);
        double[][] cm = mm._auc.defaultCM();
        Assert.assertArrayEquals(ard(ard(349, 44), ard(43, 64)), cm);
    } finally {
        parms._train.remove();
        parms._valid.remove();
        if (gbm != null)
            gbm.delete();
        Scope.exit();
    }
}
Also used : Frame(water.fvec.Frame) hex(hex) Test(org.junit.Test)

Aggregations

Frame (water.fvec.Frame)782 Test (org.junit.Test)435 Vec (water.fvec.Vec)215 ValFrame (water.rapids.vals.ValFrame)132 NFSFileVec (water.fvec.NFSFileVec)66 Val (water.rapids.Val)65 SplitFrame (hex.SplitFrame)59 Key (water.Key)56 DeepLearningParameters (hex.deeplearning.DeepLearningModel.DeepLearningParameters)54 Chunk (water.fvec.Chunk)50 NewChunk (water.fvec.NewChunk)37 MRTask (water.MRTask)33 ShuffleSplitFrame (hex.splitframe.ShuffleSplitFrame)31 Ignore (org.junit.Ignore)28 Random (java.util.Random)26 File (java.io.File)25 BufferedString (water.parser.BufferedString)21 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)19 HashMap (java.util.HashMap)17 hex (hex)16