use of hex.FrameSplitter in project h2o-3 by h2oai.
the class GBMMissingTest method run.
@Test
public void run() {
long seed = 1234;
GBMModel mymodel = null;
Frame train = null;
Frame test = null;
Frame data = null;
GBMModel.GBMParameters p;
Log.info("");
Log.info("STARTING.");
Log.info("Using seed " + seed);
StringBuilder sb = new StringBuilder();
double sumerr = 0;
Map<Double, Double> map = new TreeMap<>();
for (double missing_fraction : new double[] { 0, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99 }) {
double err = 0;
try {
Scope.enter();
NFSFileVec nfs = TestUtil.makeNfsFileVec("smalldata/junit/weather.csv");
data = ParseDataset.parse(Key.make("data.hex"), nfs._key);
Log.info("FrameSplitting");
// Create holdout test data on clean data (before adding missing values)
FrameSplitter fs = new FrameSplitter(data, new double[] { 0.75f }, generateNumKeys(data._key, 2), null);
//.join();
H2O.submitTask(fs);
Frame[] train_test = fs.getResult();
train = train_test[0];
test = train_test[1];
Log.info("Done...");
// add missing values to the training data (excluding the response)
if (missing_fraction > 0) {
Frame frtmp = new Frame(Key.<Frame>make(), train.names(), train.vecs());
//exclude the response
frtmp.remove(frtmp.numCols() - 1);
//need to put the frame (to be modified) into DKV for MissingInserter to pick up
DKV.put(frtmp._key, frtmp);
FrameUtils.MissingInserter j = new FrameUtils.MissingInserter(frtmp._key, seed, missing_fraction);
//MissingInserter is non-blocking, must block here explicitly
j.execImpl().get();
//Delete the frame header (not the data)
DKV.remove(frtmp._key);
}
// Build a regularized GBM model with polluted training data, score on clean validation set
p = new GBMModel.GBMParameters();
p._train = train._key;
p._valid = test._key;
p._response_column = train._names[train.numCols() - 1];
//only for weather data
p._ignored_columns = new String[] { train._names[1], train._names[22] };
p._seed = seed;
// Convert response to categorical
int ri = train.numCols() - 1;
int ci = test.find(p._response_column);
Scope.track(train.replace(ri, train.vecs()[ri].toCategoricalVec()));
Scope.track(test.replace(ci, test.vecs()[ci].toCategoricalVec()));
DKV.put(train);
DKV.put(test);
GBM gbm = new GBM(p);
Log.info("Starting with " + missing_fraction * 100 + "% missing values added.");
mymodel = gbm.trainModel().get();
// Extract the scoring on validation set from the model
err = ((ModelMetricsBinomial) mymodel._output._validation_metrics).logloss();
Frame train_preds = mymodel.score(train);
Assert.assertTrue(mymodel.testJavaScoring(train, train_preds, 1e-15));
train_preds.remove();
Log.info("Missing " + missing_fraction * 100 + "% -> logloss: " + err);
} catch (Throwable t) {
t.printStackTrace();
err = 100;
} finally {
Scope.exit();
// cleanup
if (mymodel != null) {
mymodel.delete();
}
if (train != null)
train.delete();
if (test != null)
test.delete();
if (data != null)
data.delete();
}
map.put(missing_fraction, err);
sumerr += err;
}
sb.append("missing fraction --> Error\n");
for (String s : Arrays.toString(map.entrySet().toArray()).split(",")) sb.append(s.replace("=", " --> ")).append("\n");
sb.append('\n');
sb.append("Sum Err: ").append(sumerr).append("\n");
Log.info(sb.toString());
}
use of hex.FrameSplitter in project h2o-2 by h2oai.
the class Expr2Test method rbindTest.
@Test
public void rbindTest() {
Key dest1 = Key.make("f1");
float[] ratios = arf(0.5f);
Frame[] splits = null;
File file1 = TestUtil.find_test_file("smalldata/tnc3_10.csv");
//File file = TestUtil.find_test_file("smalldata/iris/iris_wheader.csv");
//File file = TestUtil.find_test_file("smalldata/cars.csv");
Key fkey1 = NFSFileVec.make(file1);
Frame f = ParseDataset2.parse(dest1, new Key[] { fkey1 });
FrameSplitter fs = new FrameSplitter(f, ratios);
H2O.submitTask(fs).join();
splits = fs.getResult();
Frame rbinded_frame;
Env ev = Exec2.exec("rbind(" + splits[0]._key + "," + splits[1]._key + ")");
try {
rbinded_frame = ev.popAry();
} finally {
if (ev != null)
ev.remove_and_unlock();
}
assertEquals(rbinded_frame.numRows(), f.numRows());
rbinded_frame.delete();
Lockable.delete(dest1);
for (Frame s : splits) if (s != null)
s.delete();
}
use of hex.FrameSplitter in project h2o-3 by h2oai.
the class DeepLearningMissingTest method run.
@Test
public void run() {
long seed = 1234;
DeepLearningModel mymodel = null;
Frame train = null;
Frame test = null;
Frame data = null;
DeepLearningParameters p;
Log.info("");
Log.info("STARTING.");
Log.info("Using seed " + seed);
Map<DeepLearningParameters.MissingValuesHandling, Double> sumErr = new TreeMap<>();
StringBuilder sb = new StringBuilder();
for (DeepLearningParameters.MissingValuesHandling mvh : new DeepLearningParameters.MissingValuesHandling[] { DeepLearningParameters.MissingValuesHandling.MeanImputation, DeepLearningParameters.MissingValuesHandling.Skip }) {
double sumloss = 0;
Map<Double, Double> map = new TreeMap<>();
for (double missing_fraction : new double[] { 0, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99 }) {
double loss = 0;
try {
Scope.enter();
NFSFileVec nfs = NFSFileVec.make("smalldata/junit/weather.csv");
data = ParseDataset.parse(Key.make("data.hex"), nfs._key);
Log.info("FrameSplitting");
// Create holdout test data on clean data (before adding missing values)
FrameSplitter fs = new FrameSplitter(data, new double[] { 0.75f }, generateNumKeys(data._key, 2), null);
//.join();
H2O.submitTask(fs);
Frame[] train_test = fs.getResult();
train = train_test[0];
test = train_test[1];
Log.info("Done...");
// add missing values to the training data (excluding the response)
if (missing_fraction > 0) {
Frame frtmp = new Frame(Key.<Frame>make(), train.names(), train.vecs());
//exclude the response
frtmp.remove(frtmp.numCols() - 1);
//need to put the frame (to be modified) into DKV for MissingInserter to pick up
DKV.put(frtmp._key, frtmp);
FrameUtils.MissingInserter j = new FrameUtils.MissingInserter(frtmp._key, seed, missing_fraction);
//MissingInserter is non-blocking, must block here explicitly
j.execImpl().get();
//Delete the frame header (not the data)
DKV.remove(frtmp._key);
}
// Build a regularized DL model with polluted training data, score on clean validation set
p = new DeepLearningParameters();
p._train = train._key;
p._valid = test._key;
p._response_column = train._names[train.numCols() - 1];
//only for weather data
p._ignored_columns = new String[] { train._names[1], train._names[22] };
p._missing_values_handling = mvh;
// DeepLearningParameters.Loss.ModifiedHuber;
p._loss = DeepLearningParameters.Loss.CrossEntropy;
p._activation = DeepLearningParameters.Activation.Rectifier;
p._hidden = new int[] { 50, 50 };
p._l1 = 1e-5;
p._input_dropout_ratio = 0.2;
p._epochs = 3;
p._reproducible = true;
p._seed = seed;
p._elastic_averaging = false;
// Convert response to categorical
int ri = train.numCols() - 1;
int ci = test.find(p._response_column);
Scope.track(train.replace(ri, train.vecs()[ri].toCategoricalVec()));
Scope.track(test.replace(ci, test.vecs()[ci].toCategoricalVec()));
DKV.put(train);
DKV.put(test);
DeepLearning dl = new DeepLearning(p);
Log.info("Starting with " + missing_fraction * 100 + "% missing values added.");
mymodel = dl.trainModel().get();
// Extract the scoring on validation set from the model
loss = mymodel.loss();
Log.info("Missing " + missing_fraction * 100 + "% -> logloss: " + loss);
} catch (Throwable t) {
t.printStackTrace();
loss = 100;
} finally {
Scope.exit();
// cleanup
if (mymodel != null) {
mymodel.delete();
}
if (train != null)
train.delete();
if (test != null)
test.delete();
if (data != null)
data.delete();
}
map.put(missing_fraction, loss);
sumloss += loss;
}
sb.append("\nMethod: ").append(mvh.toString()).append("\n");
sb.append("missing fraction --> loss\n");
for (String s : Arrays.toString(map.entrySet().toArray()).split(",")) sb.append(s.replace("=", " --> ")).append("\n");
sb.append('\n');
sb.append("sum loss: ").append(sumloss).append("\n");
sumErr.put(mvh, sumloss);
}
Log.info(sb.toString());
Assert.assertEquals(405.5017, sumErr.get(DeepLearningParameters.MissingValuesHandling.Skip), 1e-2);
Assert.assertEquals(3.914915, sumErr.get(DeepLearningParameters.MissingValuesHandling.MeanImputation), 1e-3);
}
use of hex.FrameSplitter in project h2o-3 by h2oai.
the class DeepWaterAbstractIntegrationTest method testCheckpointOverwriteWithBestModel2.
// Check that the restarted model honors the previous model as a best model so far
@Test
public void testCheckpointOverwriteWithBestModel2() {
Frame tfr = null;
DeepWaterModel dl = null;
DeepWaterModel dl2 = null;
Frame train = null, valid = null;
try {
tfr = parse_test_file("./smalldata/iris/iris.csv");
FrameSplitter fs = new FrameSplitter(tfr, new double[] { 0.8 }, new Key[] { Key.make("train"), Key.make("valid") }, null);
fs.compute2();
train = fs.getResult()[0];
valid = fs.getResult()[1];
DeepWaterParameters parms = new DeepWaterParameters();
parms._backend = getBackend();
parms._train = train._key;
parms._valid = valid._key;
parms._epochs = 10;
parms._response_column = "C5";
parms._hidden = new int[] { 50, 50 };
parms._seed = 0xdecaf;
parms._train_samples_per_iteration = 0;
parms._score_duty_cycle = 1;
parms._score_interval = 0;
parms._stopping_rounds = 0;
parms._overwrite_with_best_model = true;
dl = new DeepWater(parms).trainModel().get();
double ll1 = ((ModelMetricsMultinomial) dl._output._validation_metrics).logloss();
DeepWaterParameters parms2 = (DeepWaterParameters) parms.clone();
parms2._epochs = 20;
parms2._checkpoint = dl._key;
dl2 = new DeepWater(parms2).trainModel().get();
double ll2 = ((ModelMetricsMultinomial) dl2._output._validation_metrics).logloss();
Assert.assertTrue(ll2 <= ll1);
} finally {
if (tfr != null)
tfr.delete();
if (dl != null)
dl.delete();
if (dl2 != null)
dl2.delete();
if (train != null)
train.delete();
if (valid != null)
valid.delete();
}
}
use of hex.FrameSplitter in project h2o-2 by h2oai.
the class FrameSplitPage method execImpl.
// Run the function
@Override
protected void execImpl() {
Frame frame = source;
if (shuffle) {
// FIXME: switch to global shuffle
frame = MRUtils.shuffleFramePerChunk(Utils.generateShuffledKey(frame._key), frame, seed);
// save frame to DKV
frame.delete_and_lock(null).unlock(null);
// delete frame on the end
gtrash(frame);
}
FrameSplitter fs = new FrameSplitter(frame, ratios);
H2O.submitTask(fs);
Frame[] splits = fs.getResult();
split_keys = new Key[splits.length];
split_rows = new long[splits.length];
float rsum = Utils.sum(ratios);
split_ratios = Arrays.copyOf(ratios, splits.length);
split_ratios[splits.length - 1] = 1f - rsum;
long sum = 0;
for (int i = 0; i < splits.length; i++) {
sum += splits[i].numRows();
split_keys[i] = splits[i]._key;
split_rows[i] = splits[i].numRows();
}
assert sum == source.numRows() : "Frame split produced wrong number of rows: nrows(source) != sum(nrows(splits))";
}
Aggregations