use of water.fvec.NFSFileVec in project h2o-2 by h2oai.
the class MRThrow method testInvokeThrow.
// ---
// Map in h2o.jar - a multi-megabyte file - into Arraylets.
// Run a distributed byte histogram. Throw an exception in *some* map call,
// and make sure it's forwarded to the invoke.
@Test
public void testInvokeThrow() {
File file = find_test_file("target/h2o.jar");
Key h2okey = load_test_file(file);
NFSFileVec nfs = DKV.get(h2okey).get();
try {
for (int i = 0; i < H2O.CLOUD._memary.length; ++i) {
ByteHistoThrow bh = new ByteHistoThrow();
bh._throwAt = H2O.CLOUD._memary[i].toString();
try {
// invoke should throw DistributedException wrapped up in RunTimeException
bh.doAll(nfs);
fail("should've thrown");
} catch (RuntimeException e) {
assertTrue(e.getMessage().contains("test"));
} catch (Throwable ex) {
ex.printStackTrace();
fail("Expected RuntimeException, got " + ex.toString());
}
}
} finally {
// so once a map() call fails, other map calls can lazily load data after we call delete()
try {
Thread.sleep(100);
} catch (InterruptedException ignore) {
}
Lockable.delete(h2okey);
}
}
use of water.fvec.NFSFileVec in project h2o-2 by h2oai.
the class MRThrow method testGetThrow.
@Test
public void testGetThrow() {
File file = find_test_file("target/h2o.jar");
Key h2okey = load_test_file(file);
NFSFileVec nfs = DKV.get(h2okey).get();
try {
for (int i = 0; i < H2O.CLOUD._memary.length; ++i) {
ByteHistoThrow bh = new ByteHistoThrow();
bh._throwAt = H2O.CLOUD._memary[i].toString();
try {
// invoke should throw DistributedException wrapped up in RunTimeException
bh.dfork(nfs).get();
fail("should've thrown");
} catch (ExecutionException e) {
assertTrue(e.getMessage().contains("test"));
} catch (Throwable ex) {
ex.printStackTrace();
fail("Expected ExecutionException, got " + ex.toString());
}
}
} finally {
// so once a map() call fails, other map calls can lazily load data after we call delete()
try {
Thread.sleep(100);
} catch (InterruptedException ignore) {
}
Lockable.delete(h2okey);
}
}
use of water.fvec.NFSFileVec in project h2o-3 by h2oai.
the class DeepLearningAutoEncoderCategoricalTest method run.
@Test
public void run() {
long seed = 0xDECAF;
NFSFileVec nfs = TestUtil.makeNfsFileVec(PATH);
Frame train = ParseDataset.parse(Key.make("train.hex"), nfs._key);
DeepLearningParameters p = new DeepLearningParameters();
p._train = train._key;
p._autoencoder = true;
p._response_column = train.names()[train.names().length - 1];
p._seed = seed;
p._hidden = new int[] { 10, 5, 3 };
p._adaptive_rate = true;
// String[] n = train.names();
// p._ignored_columns = new String[]{n[0],n[1],n[2],n[3],n[6],n[7],n[8],n[10]}; //Optional: ignore all categoricals
// p._ignored_columns = new String[]{train.names()[4], train.names()[5], train.names()[9]}; //Optional: ignore all numericals
p._l1 = 1e-4;
p._activation = DeepLearningParameters.Activation.Tanh;
p._max_w2 = 10;
p._train_samples_per_iteration = -1;
p._loss = DeepLearningParameters.Loss.Huber;
p._epochs = 0.2;
p._force_load_balance = true;
p._score_training_samples = 0;
p._score_validation_samples = 0;
p._reproducible = true;
DeepLearning dl = new DeepLearning(p);
DeepLearningModel mymodel = dl.trainModel().get();
// Verification of results
StringBuilder sb = new StringBuilder();
sb.append("Verifying results.\n");
sb.append("Reported mean reconstruction error: " + mymodel.mse() + "\n");
// Training data
// Reconstruct data using the same helper functions and verify that self-reported MSE agrees
final Frame rec = mymodel.scoreAutoEncoder(train, Key.make(), true);
sb.append("Reconstruction error per feature: " + rec.toString() + "\n");
rec.remove();
final Frame l2 = mymodel.scoreAutoEncoder(train, Key.make(), false);
final Vec l2vec = l2.anyVec();
sb.append("Actual mean reconstruction error: " + l2vec.mean() + "\n");
// print stats and potential outliers
double quantile = 1 - 5. / train.numRows();
sb.append("The following training points are reconstructed with an error above the " + quantile * 100 + "-th percentile - potential \"outliers\" in testing data.\n");
double thresh = mymodel.calcOutlierThreshold(l2vec, quantile);
for (long i = 0; i < l2vec.length(); i++) {
if (l2vec.at(i) > thresh) {
sb.append(String.format("row %d : l2vec error = %5f\n", i, l2vec.at(i)));
}
}
Log.info(sb.toString());
Assert.assertEquals(l2vec.mean(), mymodel.mse(), 1e-8 * mymodel.mse());
// Create reconstruction
Log.info("Creating full reconstruction.");
final Frame recon_train = mymodel.score(train);
Assert.assertTrue(mymodel.testJavaScoring(train, recon_train, 1e-5));
Frame df1 = mymodel.scoreDeepFeatures(train, 0);
Assert.assertTrue(df1.numCols() == 10);
Assert.assertTrue(df1.numRows() == train.numRows());
df1.delete();
Frame df2 = mymodel.scoreDeepFeatures(train, 1);
Assert.assertTrue(df2.numCols() == 5);
Assert.assertTrue(df2.numRows() == train.numRows());
df2.delete();
Frame df3 = mymodel.scoreDeepFeatures(train, 2);
Assert.assertTrue(df3.numCols() == 3);
Assert.assertTrue(df3.numRows() == train.numRows());
df3.delete();
// cleanup
recon_train.delete();
train.delete();
mymodel.delete();
l2.delete();
}
use of water.fvec.NFSFileVec in project h2o-3 by h2oai.
the class DeepWaterAbstractIntegrationTest method MNISTSparse.
@Test
public void MNISTSparse() {
Frame tr = null;
Frame va = null;
DeepWaterModel m = null;
try {
DeepWaterParameters p = new DeepWaterParameters();
File file = FileUtils.locateFile("bigdata/laptop/mnist/train.csv.gz");
File valid = FileUtils.locateFile("bigdata/laptop/mnist/test.csv.gz");
if (file != null) {
p._response_column = "C785";
NFSFileVec trainfv = NFSFileVec.make(file);
tr = ParseDataset.parse(Key.make(), trainfv._key);
NFSFileVec validfv = NFSFileVec.make(valid);
va = ParseDataset.parse(Key.make(), validfv._key);
for (String col : new String[] { p._response_column }) {
Vec v = tr.remove(col);
tr.add(col, v.toCategoricalVec());
v.remove();
v = va.remove(col);
va.add(col, v.toCategoricalVec());
v.remove();
}
DKV.put(tr);
DKV.put(va);
p._backend = getBackend();
p._train = tr._key;
p._valid = va._key;
p._hidden = new int[] { 500, 500 };
p._sparse = true;
DeepWater j = new DeepWater(p);
m = j.trainModel().get();
Assert.assertTrue(((ModelMetricsMultinomial) (m._output._validation_metrics)).mean_per_class_error() < 0.05);
}
} finally {
if (tr != null)
tr.remove();
if (va != null)
va.remove();
if (m != null)
m.remove();
}
}
use of water.fvec.NFSFileVec in project h2o-3 by h2oai.
the class DeepWaterAbstractIntegrationTest method Airlines.
@Test
public void Airlines() {
Frame tr = null;
DeepWaterModel m = null;
Frame[] splits = null;
try {
DeepWaterParameters p = new DeepWaterParameters();
File file = FileUtils.locateFile("smalldata/airlines/allyears2k_headers.zip");
if (file != null) {
p._response_column = "IsDepDelayed";
p._ignored_columns = new String[] { "DepTime", "ArrTime", "Cancelled", "CancellationCode", "Diverted", "CarrierDelay", "WeatherDelay", "NASDelay", "SecurityDelay", "LateAircraftDelay", "IsArrDelayed" };
NFSFileVec trainfv = NFSFileVec.make(file);
tr = ParseDataset.parse(Key.make(), trainfv._key);
for (String col : new String[] { p._response_column, "UniqueCarrier", "Origin", "Dest" }) {
Vec v = tr.remove(col);
tr.add(col, v.toCategoricalVec());
v.remove();
}
DKV.put(tr);
double[] ratios = ard(0.5, 0.5);
Key[] keys = aro(Key.make("test.hex"), Key.make("train.hex"));
splits = ShuffleSplitFrame.shuffleSplitFrame(tr, keys, ratios, 42);
p._backend = getBackend();
p._train = keys[0];
p._valid = keys[1];
DeepWater j = new DeepWater(p);
m = j.trainModel().get();
Assert.assertTrue(((ModelMetricsBinomial) (m._output._validation_metrics)).auc() > 0.65);
}
} finally {
if (tr != null)
tr.remove();
if (m != null)
m.remove();
if (splits != null)
for (Frame s : splits) s.remove();
}
}
Aggregations