Search in sources :

Example 21 with NFSFileVec

use of water.fvec.NFSFileVec in project h2o-2 by h2oai.

the class MRThrow method testInvokeThrow.

// ---
// Map in h2o.jar - a multi-megabyte file - into Arraylets.
// Run a distributed byte histogram.  Throw an exception in *some* map call,
// and make sure it's forwarded to the invoke.
@Test
public void testInvokeThrow() {
    File file = find_test_file("target/h2o.jar");
    Key h2okey = load_test_file(file);
    NFSFileVec nfs = DKV.get(h2okey).get();
    try {
        for (int i = 0; i < H2O.CLOUD._memary.length; ++i) {
            ByteHistoThrow bh = new ByteHistoThrow();
            bh._throwAt = H2O.CLOUD._memary[i].toString();
            try {
                // invoke should throw DistributedException wrapped up in RunTimeException
                bh.doAll(nfs);
                fail("should've thrown");
            } catch (RuntimeException e) {
                assertTrue(e.getMessage().contains("test"));
            } catch (Throwable ex) {
                ex.printStackTrace();
                fail("Expected RuntimeException, got " + ex.toString());
            }
        }
    } finally {
        // so once a map() call fails, other map calls can lazily load data after we call delete()
        try {
            Thread.sleep(100);
        } catch (InterruptedException ignore) {
        }
        Lockable.delete(h2okey);
    }
}
Also used : NFSFileVec(water.fvec.NFSFileVec) File(java.io.File) Test(org.junit.Test)

Example 22 with NFSFileVec

use of water.fvec.NFSFileVec in project h2o-2 by h2oai.

the class MRThrow method testGetThrow.

@Test
public void testGetThrow() {
    File file = find_test_file("target/h2o.jar");
    Key h2okey = load_test_file(file);
    NFSFileVec nfs = DKV.get(h2okey).get();
    try {
        for (int i = 0; i < H2O.CLOUD._memary.length; ++i) {
            ByteHistoThrow bh = new ByteHistoThrow();
            bh._throwAt = H2O.CLOUD._memary[i].toString();
            try {
                // invoke should throw DistributedException wrapped up in RunTimeException
                bh.dfork(nfs).get();
                fail("should've thrown");
            } catch (ExecutionException e) {
                assertTrue(e.getMessage().contains("test"));
            } catch (Throwable ex) {
                ex.printStackTrace();
                fail("Expected ExecutionException, got " + ex.toString());
            }
        }
    } finally {
        // so once a map() call fails, other map calls can lazily load data after we call delete()
        try {
            Thread.sleep(100);
        } catch (InterruptedException ignore) {
        }
        Lockable.delete(h2okey);
    }
}
Also used : NFSFileVec(water.fvec.NFSFileVec) ExecutionException(java.util.concurrent.ExecutionException) File(java.io.File) Test(org.junit.Test)

Example 23 with NFSFileVec

use of water.fvec.NFSFileVec in project h2o-3 by h2oai.

the class DeepLearningAutoEncoderCategoricalTest method run.

@Test
public void run() {
    long seed = 0xDECAF;
    NFSFileVec nfs = TestUtil.makeNfsFileVec(PATH);
    Frame train = ParseDataset.parse(Key.make("train.hex"), nfs._key);
    DeepLearningParameters p = new DeepLearningParameters();
    p._train = train._key;
    p._autoencoder = true;
    p._response_column = train.names()[train.names().length - 1];
    p._seed = seed;
    p._hidden = new int[] { 10, 5, 3 };
    p._adaptive_rate = true;
    //    String[] n = train.names();
    //    p._ignored_columns = new String[]{n[0],n[1],n[2],n[3],n[6],n[7],n[8],n[10]}; //Optional: ignore all categoricals
    //    p._ignored_columns = new String[]{train.names()[4], train.names()[5], train.names()[9]}; //Optional: ignore all numericals
    p._l1 = 1e-4;
    p._activation = DeepLearningParameters.Activation.Tanh;
    p._max_w2 = 10;
    p._train_samples_per_iteration = -1;
    p._loss = DeepLearningParameters.Loss.Huber;
    p._epochs = 0.2;
    p._force_load_balance = true;
    p._score_training_samples = 0;
    p._score_validation_samples = 0;
    p._reproducible = true;
    DeepLearning dl = new DeepLearning(p);
    DeepLearningModel mymodel = dl.trainModel().get();
    // Verification of results
    StringBuilder sb = new StringBuilder();
    sb.append("Verifying results.\n");
    sb.append("Reported mean reconstruction error: " + mymodel.mse() + "\n");
    // Training data
    // Reconstruct data using the same helper functions and verify that self-reported MSE agrees
    final Frame rec = mymodel.scoreAutoEncoder(train, Key.make(), true);
    sb.append("Reconstruction error per feature: " + rec.toString() + "\n");
    rec.remove();
    final Frame l2 = mymodel.scoreAutoEncoder(train, Key.make(), false);
    final Vec l2vec = l2.anyVec();
    sb.append("Actual   mean reconstruction error: " + l2vec.mean() + "\n");
    // print stats and potential outliers
    double quantile = 1 - 5. / train.numRows();
    sb.append("The following training points are reconstructed with an error above the " + quantile * 100 + "-th percentile - potential \"outliers\" in testing data.\n");
    double thresh = mymodel.calcOutlierThreshold(l2vec, quantile);
    for (long i = 0; i < l2vec.length(); i++) {
        if (l2vec.at(i) > thresh) {
            sb.append(String.format("row %d : l2vec error = %5f\n", i, l2vec.at(i)));
        }
    }
    Log.info(sb.toString());
    Assert.assertEquals(l2vec.mean(), mymodel.mse(), 1e-8 * mymodel.mse());
    // Create reconstruction
    Log.info("Creating full reconstruction.");
    final Frame recon_train = mymodel.score(train);
    Assert.assertTrue(mymodel.testJavaScoring(train, recon_train, 1e-5));
    Frame df1 = mymodel.scoreDeepFeatures(train, 0);
    Assert.assertTrue(df1.numCols() == 10);
    Assert.assertTrue(df1.numRows() == train.numRows());
    df1.delete();
    Frame df2 = mymodel.scoreDeepFeatures(train, 1);
    Assert.assertTrue(df2.numCols() == 5);
    Assert.assertTrue(df2.numRows() == train.numRows());
    df2.delete();
    Frame df3 = mymodel.scoreDeepFeatures(train, 2);
    Assert.assertTrue(df3.numCols() == 3);
    Assert.assertTrue(df3.numRows() == train.numRows());
    df3.delete();
    // cleanup
    recon_train.delete();
    train.delete();
    mymodel.delete();
    l2.delete();
}
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec) NFSFileVec(water.fvec.NFSFileVec) NFSFileVec(water.fvec.NFSFileVec) DeepLearningParameters(hex.deeplearning.DeepLearningModel.DeepLearningParameters) Test(org.junit.Test)

Example 24 with NFSFileVec

use of water.fvec.NFSFileVec in project h2o-3 by h2oai.

the class DeepWaterAbstractIntegrationTest method MNISTSparse.

@Test
public void MNISTSparse() {
    Frame tr = null;
    Frame va = null;
    DeepWaterModel m = null;
    try {
        DeepWaterParameters p = new DeepWaterParameters();
        File file = FileUtils.locateFile("bigdata/laptop/mnist/train.csv.gz");
        File valid = FileUtils.locateFile("bigdata/laptop/mnist/test.csv.gz");
        if (file != null) {
            p._response_column = "C785";
            NFSFileVec trainfv = NFSFileVec.make(file);
            tr = ParseDataset.parse(Key.make(), trainfv._key);
            NFSFileVec validfv = NFSFileVec.make(valid);
            va = ParseDataset.parse(Key.make(), validfv._key);
            for (String col : new String[] { p._response_column }) {
                Vec v = tr.remove(col);
                tr.add(col, v.toCategoricalVec());
                v.remove();
                v = va.remove(col);
                va.add(col, v.toCategoricalVec());
                v.remove();
            }
            DKV.put(tr);
            DKV.put(va);
            p._backend = getBackend();
            p._train = tr._key;
            p._valid = va._key;
            p._hidden = new int[] { 500, 500 };
            p._sparse = true;
            DeepWater j = new DeepWater(p);
            m = j.trainModel().get();
            Assert.assertTrue(((ModelMetricsMultinomial) (m._output._validation_metrics)).mean_per_class_error() < 0.05);
        }
    } finally {
        if (tr != null)
            tr.remove();
        if (va != null)
            va.remove();
        if (m != null)
            m.remove();
    }
}
Also used : Frame(water.fvec.Frame) ShuffleSplitFrame(hex.splitframe.ShuffleSplitFrame) NFSFileVec(water.fvec.NFSFileVec) Vec(water.fvec.Vec) NFSFileVec(water.fvec.NFSFileVec) ModelMetricsMultinomial(hex.ModelMetricsMultinomial)

Example 25 with NFSFileVec

use of water.fvec.NFSFileVec in project h2o-3 by h2oai.

the class DeepWaterAbstractIntegrationTest method Airlines.

@Test
public void Airlines() {
    Frame tr = null;
    DeepWaterModel m = null;
    Frame[] splits = null;
    try {
        DeepWaterParameters p = new DeepWaterParameters();
        File file = FileUtils.locateFile("smalldata/airlines/allyears2k_headers.zip");
        if (file != null) {
            p._response_column = "IsDepDelayed";
            p._ignored_columns = new String[] { "DepTime", "ArrTime", "Cancelled", "CancellationCode", "Diverted", "CarrierDelay", "WeatherDelay", "NASDelay", "SecurityDelay", "LateAircraftDelay", "IsArrDelayed" };
            NFSFileVec trainfv = NFSFileVec.make(file);
            tr = ParseDataset.parse(Key.make(), trainfv._key);
            for (String col : new String[] { p._response_column, "UniqueCarrier", "Origin", "Dest" }) {
                Vec v = tr.remove(col);
                tr.add(col, v.toCategoricalVec());
                v.remove();
            }
            DKV.put(tr);
            double[] ratios = ard(0.5, 0.5);
            Key[] keys = aro(Key.make("test.hex"), Key.make("train.hex"));
            splits = ShuffleSplitFrame.shuffleSplitFrame(tr, keys, ratios, 42);
            p._backend = getBackend();
            p._train = keys[0];
            p._valid = keys[1];
            DeepWater j = new DeepWater(p);
            m = j.trainModel().get();
            Assert.assertTrue(((ModelMetricsBinomial) (m._output._validation_metrics)).auc() > 0.65);
        }
    } finally {
        if (tr != null)
            tr.remove();
        if (m != null)
            m.remove();
        if (splits != null)
            for (Frame s : splits) s.remove();
    }
}
Also used : Frame(water.fvec.Frame) ShuffleSplitFrame(hex.splitframe.ShuffleSplitFrame) NFSFileVec(water.fvec.NFSFileVec) ModelMetricsBinomial(hex.ModelMetricsBinomial) NFSFileVec(water.fvec.NFSFileVec) Vec(water.fvec.Vec)

Aggregations

NFSFileVec (water.fvec.NFSFileVec)27 Frame (water.fvec.Frame)21 Test (org.junit.Test)13 File (java.io.File)12 DeepLearningParameters (hex.deeplearning.DeepLearningModel.DeepLearningParameters)9 Vec (water.fvec.Vec)8 ShuffleSplitFrame (hex.splitframe.ShuffleSplitFrame)5 Key (water.Key)5 ModelMetricsMultinomial (hex.ModelMetricsMultinomial)3 FrameSplitter (hex.FrameSplitter)2 ModelMetricsBinomial (hex.ModelMetricsBinomial)2 Random (java.util.Random)2 ExecutionException (java.util.concurrent.ExecutionException)2 DateTimeFormatter (org.joda.time.format.DateTimeFormatter)2 Ignore (org.junit.Ignore)2 FrameUtils (water.util.FrameUtils)2 TwoDimTable (water.util.TwoDimTable)2 ConfusionMatrix (hex.ConfusionMatrix)1 SplitFrame (hex.SplitFrame)1 ClassSamplingMethod (hex.deeplearning.DeepLearningModel.DeepLearningParameters.ClassSamplingMethod)1