use of water.fvec.NFSFileVec in project h2o-3 by h2oai.
the class DeepLearningSpiralsTest method run.
@Test
public void run() {
Scope.enter();
NFSFileVec nfs = TestUtil.makeNfsFileVec("smalldata/junit/two_spiral.csv");
Frame frame = ParseDataset.parse(Key.make(), nfs._key);
Log.info(frame);
int resp = frame.names().length - 1;
for (boolean sparse : new boolean[] { true, false }) {
for (boolean col_major : new boolean[] { false }) {
if (!sparse && col_major)
continue;
Key model_id = Key.make();
// build the model
{
DeepLearningParameters p = new DeepLearningParameters();
p._epochs = 5000;
p._hidden = new int[] { 100 };
p._sparse = sparse;
p._col_major = col_major;
p._activation = DeepLearningParameters.Activation.Tanh;
p._initial_weight_distribution = DeepLearningParameters.InitialWeightDistribution.Normal;
p._initial_weight_scale = 2.5;
p._loss = DeepLearningParameters.Loss.CrossEntropy;
p._train = frame._key;
p._response_column = frame.names()[resp];
// Convert response to categorical
Scope.track(frame.replace(resp, frame.vecs()[resp].toCategoricalVec()));
DKV.put(frame);
p._rho = 0.99;
p._epsilon = 5e-3;
//stop when reaching 0 classification error on training data
p._classification_stop = 0;
p._train_samples_per_iteration = 10000;
p._stopping_rounds = 5;
p._stopping_metric = ScoreKeeper.StoppingMetric.misclassification;
p._score_each_iteration = true;
p._reproducible = true;
p._seed = 1234;
new DeepLearning(p, model_id).trainModel().get();
}
// score and check result
{
DeepLearningModel mymodel = DKV.getGet(model_id);
Frame pred = mymodel.score(frame);
ModelMetricsBinomial mm = ModelMetricsBinomial.getFromDKV(mymodel, frame);
double error = mm._auc.defaultErr();
Log.info("Error: " + error);
if (error > 0.1) {
Assert.fail("Test classification error is not <= 0.1, but " + error + ".");
}
Assert.assertTrue(mymodel.testJavaScoring(frame, pred, 1e-6));
pred.delete();
mymodel.delete();
}
}
}
frame.delete();
Scope.exit();
}
use of water.fvec.NFSFileVec in project h2o-3 by h2oai.
the class DeepLearningCheckpointReporting method run.
@Test
public void run() {
Scope.enter();
Frame frame = null;
try {
NFSFileVec trainfv = TestUtil.makeNfsFileVec("smalldata/logreg/prostate.csv");
frame = ParseDataset.parse(Key.make(), trainfv._key);
DeepLearningParameters p = new DeepLearningParameters();
// populate model parameters
p._train = frame._key;
// last column is the response
p._response_column = "CAPSULE";
p._activation = DeepLearningParameters.Activation.Rectifier;
p._epochs = 4;
p._train_samples_per_iteration = -1;
p._score_duty_cycle = 1;
p._score_interval = 0;
p._overwrite_with_best_model = false;
p._classification_stop = -1;
p._seed = 1234;
p._reproducible = true;
// Convert response 'C785' to categorical (digits 1 to 10)
int ci = frame.find("CAPSULE");
Scope.track(frame.replace(ci, frame.vecs()[ci].toCategoricalVec()));
DKV.put(frame);
long start = System.currentTimeMillis();
//to avoid rounding issues with printed time stamp (1 second resolution)
try {
Thread.sleep(1000);
} catch (InterruptedException ex) {
}
DeepLearningModel model = new DeepLearning(p).trainModel().get();
//seconds
long sleepTime = 5;
try {
Thread.sleep(sleepTime * 1000);
} catch (InterruptedException ex) {
}
// checkpoint restart after sleep
DeepLearningParameters p2 = (DeepLearningParameters) p.clone();
p2._checkpoint = model._key;
p2._epochs *= 2;
DeepLearningModel model2 = null;
try {
model2 = new DeepLearning(p2).trainModel().get();
long end = System.currentTimeMillis();
TwoDimTable table = model2._output._scoring_history;
double priorDurationDouble = 0;
long priorTimeStampLong = 0;
DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss");
for (int i = 0; i < table.getRowDim(); ++i) {
// Check that timestamp is correct, and growing monotonically
String timestamp = (String) table.get(i, 0);
long timeStampLong = fmt.parseMillis(timestamp);
Assert.assertTrue("Timestamp must be later than outside timer start", timeStampLong >= start);
Assert.assertTrue("Timestamp must be earlier than outside timer end", timeStampLong <= end);
Assert.assertTrue("Timestamp must increase", timeStampLong >= priorTimeStampLong);
priorTimeStampLong = timeStampLong;
// Check that duration is growing monotonically
String duration = (String) table.get(i, 1);
//"x.xxxx sec"
duration = duration.substring(0, duration.length() - 4);
try {
double durationDouble = Double.parseDouble(duration);
Assert.assertTrue("Duration must be >0: " + durationDouble, durationDouble >= 0);
Assert.assertTrue("Duration must increase: " + priorDurationDouble + " -> " + durationDouble, durationDouble >= priorDurationDouble);
Assert.assertTrue("Duration cannot be more than outside timer delta", durationDouble <= (end - start) / 1e3);
priorDurationDouble = durationDouble;
} catch (NumberFormatException ex) {
//skip
}
// Check that epoch counting is good
//1 epoch per step
Assert.assertTrue("Epoch counter must be contiguous", (Double) table.get(i, 3) == i);
//1 iteration per step
Assert.assertTrue("Iteration counter must match epochs", (Integer) table.get(i, 4) == i);
}
try {
// Check that duration doesn't see the sleep
String durationBefore = (String) table.get((int) (p._epochs), 1);
durationBefore = durationBefore.substring(0, durationBefore.length() - 4);
String durationAfter = (String) table.get((int) (p._epochs + 1), 1);
durationAfter = durationAfter.substring(0, durationAfter.length() - 4);
double diff = Double.parseDouble(durationAfter) - Double.parseDouble(durationBefore);
Assert.assertTrue("Duration must be smooth; actual " + diff + ", expected at most " + sleepTime + " (before=" + durationBefore + ", after=" + durationAfter + ")", diff < sleepTime + 1);
// Check that time stamp does see the sleep
String timeStampBefore = (String) table.get((int) (p._epochs), 0);
long timeStampBeforeLong = fmt.parseMillis(timeStampBefore);
String timeStampAfter = (String) table.get((int) (p._epochs + 1), 0);
long timeStampAfterLong = fmt.parseMillis(timeStampAfter);
Assert.assertTrue("Time stamp must experience a delay", timeStampAfterLong - timeStampBeforeLong >= (sleepTime - 1) * 1000);
// Check that the training speed is similar before and after checkpoint restart
String speedBefore = (String) table.get((int) (p._epochs), 2);
speedBefore = speedBefore.substring(0, speedBefore.length() - 9);
double speedBeforeDouble = Double.parseDouble(speedBefore);
String speedAfter = (String) table.get((int) (p._epochs + 1), 2);
speedAfter = speedAfter.substring(0, speedAfter.length() - 9);
double speedAfterDouble = Double.parseDouble(speedAfter);
//expect less than 50% change in speed
Assert.assertTrue("Speed shouldn't change more than 50%", Math.abs(speedAfterDouble - speedBeforeDouble) / speedBeforeDouble < 0.5);
} catch (NumberFormatException ex) {
//skip runtimes > 1 minute (too hard to parse into seconds here...).
}
} finally {
if (model != null)
model.delete();
if (model2 != null)
model2.delete();
}
} finally {
if (frame != null)
frame.remove();
Scope.exit();
}
}
use of water.fvec.NFSFileVec in project h2o-3 by h2oai.
the class DeepWaterAbstractIntegrationTest method MNISTHinton.
@Test
public void MNISTHinton() {
Frame tr = null;
Frame va = null;
DeepWaterModel m = null;
try {
DeepWaterParameters p = new DeepWaterParameters();
File file = FileUtils.locateFile("bigdata/laptop/mnist/train.csv.gz");
File valid = FileUtils.locateFile("bigdata/laptop/mnist/test.csv.gz");
if (file != null) {
p._response_column = "C785";
NFSFileVec trainfv = NFSFileVec.make(file);
tr = ParseDataset.parse(Key.make(), trainfv._key);
NFSFileVec validfv = NFSFileVec.make(valid);
va = ParseDataset.parse(Key.make(), validfv._key);
for (String col : new String[] { p._response_column }) {
Vec v = tr.remove(col);
tr.add(col, v.toCategoricalVec());
v.remove();
v = va.remove(col);
va.add(col, v.toCategoricalVec());
v.remove();
}
DKV.put(tr);
DKV.put(va);
p._backend = getBackend();
p._hidden = new int[] { 1024, 1024, 2048 };
p._input_dropout_ratio = 0.1;
p._hidden_dropout_ratios = new double[] { 0.5, 0.5, 0.5 };
p._stopping_rounds = 0;
p._learning_rate = 1e-3;
p._mini_batch_size = 32;
p._epochs = 20;
p._train = tr._key;
p._valid = va._key;
DeepWater j = new DeepWater(p);
m = j.trainModel().get();
Assert.assertTrue(((ModelMetricsMultinomial) (m._output._validation_metrics)).mean_per_class_error() < 0.05);
}
} finally {
if (tr != null)
tr.remove();
if (va != null)
va.remove();
if (m != null)
m.remove();
}
}
use of water.fvec.NFSFileVec in project h2o-3 by h2oai.
the class DeepWaterAbstractIntegrationTest method MNISTLenet.
@Test
public void MNISTLenet() {
Frame tr = null;
Frame va = null;
DeepWaterModel m = null;
try {
DeepWaterParameters p = new DeepWaterParameters();
File file = FileUtils.locateFile("bigdata/laptop/mnist/train.csv.gz");
File valid = FileUtils.locateFile("bigdata/laptop/mnist/test.csv.gz");
if (file != null) {
p._response_column = "C785";
NFSFileVec trainfv = NFSFileVec.make(file);
tr = ParseDataset.parse(Key.make(), trainfv._key);
NFSFileVec validfv = NFSFileVec.make(valid);
va = ParseDataset.parse(Key.make(), validfv._key);
for (String col : new String[] { p._response_column }) {
Vec v = tr.remove(col);
tr.add(col, v.toCategoricalVec());
v.remove();
v = va.remove(col);
va.add(col, v.toCategoricalVec());
v.remove();
}
DKV.put(tr);
DKV.put(va);
p._backend = getBackend();
p._train = tr._key;
p._valid = va._key;
p._image_shape = new int[] { 28, 28 };
//to keep it 28x28
p._ignore_const_cols = false;
p._channels = 1;
p._network = lenet;
DeepWater j = new DeepWater(p);
m = j.trainModel().get();
Assert.assertTrue(((ModelMetricsMultinomial) (m._output._validation_metrics)).mean_per_class_error() < 0.05);
}
} finally {
if (tr != null)
tr.remove();
if (va != null)
va.remove();
if (m != null)
m.remove();
}
}
use of water.fvec.NFSFileVec in project h2o-3 by h2oai.
the class DeepWaterAbstractIntegrationTest method checkpointReporting.
@Test
public void checkpointReporting() {
Scope.enter();
Frame frame = null;
try {
File file = FileUtils.locateFile("smalldata/logreg/prostate.csv");
NFSFileVec trainfv = NFSFileVec.make(file);
frame = ParseDataset.parse(Key.make(), trainfv._key);
DeepWaterParameters p = new DeepWaterParameters();
// populate model parameters
p._backend = getBackend();
p._train = frame._key;
// last column is the response
p._response_column = "CAPSULE";
p._activation = DeepWaterParameters.Activation.Rectifier;
p._epochs = 4;
p._train_samples_per_iteration = -1;
p._mini_batch_size = 1;
p._score_duty_cycle = 1;
p._score_interval = 0;
p._overwrite_with_best_model = false;
p._seed = 1234;
// Convert response 'C785' to categorical (digits 1 to 10)
int ci = frame.find("CAPSULE");
Scope.track(frame.replace(ci, frame.vecs()[ci].toCategoricalVec()));
DKV.put(frame);
long start = System.currentTimeMillis();
//to avoid rounding issues with printed time stamp (1 second resolution)
try {
Thread.sleep(1000);
} catch (InterruptedException ex) {
}
DeepWaterModel model = new DeepWater(p).trainModel().get();
//seconds
long sleepTime = 5;
try {
Thread.sleep(sleepTime * 1000);
} catch (InterruptedException ex) {
}
// checkpoint restart after sleep
DeepWaterParameters p2 = (DeepWaterParameters) p.clone();
p2._checkpoint = model._key;
p2._epochs *= 2;
DeepWaterModel model2 = null;
try {
model2 = new DeepWater(p2).trainModel().get();
long end = System.currentTimeMillis();
TwoDimTable table = model2._output._scoring_history;
double priorDurationDouble = 0;
long priorTimeStampLong = 0;
DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss");
for (int i = 0; i < table.getRowDim(); ++i) {
// Check that timestamp is correct, and growing monotonically
String timestamp = (String) table.get(i, 0);
long timeStampLong = fmt.parseMillis(timestamp);
Assert.assertTrue("Timestamp must be later than outside timer start", timeStampLong >= start);
Assert.assertTrue("Timestamp must be earlier than outside timer end", timeStampLong <= end);
Assert.assertTrue("Timestamp must increase", timeStampLong >= priorTimeStampLong);
priorTimeStampLong = timeStampLong;
// Check that duration is growing monotonically
String duration = (String) table.get(i, 1);
//"x.xxxx sec"
duration = duration.substring(0, duration.length() - 4);
try {
double durationDouble = Double.parseDouble(duration);
Assert.assertTrue("Duration must be >0: " + durationDouble, durationDouble >= 0);
Assert.assertTrue("Duration must increase: " + priorDurationDouble + " -> " + durationDouble, durationDouble >= priorDurationDouble);
Assert.assertTrue("Duration cannot be more than outside timer delta", durationDouble <= (end - start) / 1e3);
priorDurationDouble = durationDouble;
} catch (NumberFormatException ex) {
//skip
}
// Check that epoch counting is good
//1 epoch per step
Assert.assertTrue("Epoch counter must be contiguous", (Double) table.get(i, 3) == i);
//1 iteration per step
Assert.assertTrue("Iteration counter must match epochs", (Integer) table.get(i, 4) == i);
}
try {
// Check that duration doesn't see the sleep
String durationBefore = (String) table.get((int) (p._epochs), 1);
durationBefore = durationBefore.substring(0, durationBefore.length() - 4);
String durationAfter = (String) table.get((int) (p._epochs + 1), 1);
durationAfter = durationAfter.substring(0, durationAfter.length() - 4);
Assert.assertTrue("Duration must be smooth", Double.parseDouble(durationAfter) - Double.parseDouble(durationBefore) < sleepTime + 1);
// Check that time stamp does see the sleep
String timeStampBefore = (String) table.get((int) (p._epochs), 0);
long timeStampBeforeLong = fmt.parseMillis(timeStampBefore);
String timeStampAfter = (String) table.get((int) (p._epochs + 1), 0);
long timeStampAfterLong = fmt.parseMillis(timeStampAfter);
Assert.assertTrue("Time stamp must experience a delay", timeStampAfterLong - timeStampBeforeLong >= (sleepTime - 1) * 1000);
// Check that the training speed is similar before and after checkpoint restart
String speedBefore = (String) table.get((int) (p._epochs), 2);
speedBefore = speedBefore.substring(0, speedBefore.length() - 9);
double speedBeforeDouble = Double.parseDouble(speedBefore);
String speedAfter = (String) table.get((int) (p._epochs + 1), 2);
speedAfter = speedAfter.substring(0, speedAfter.length() - 9);
double speedAfterDouble = Double.parseDouble(speedAfter);
//expect less than 50% change in speed
Assert.assertTrue("Speed shouldn't change more than 50%", Math.abs(speedAfterDouble - speedBeforeDouble) / speedBeforeDouble < 0.5);
} catch (NumberFormatException ex) {
//skip runtimes > 1 minute (too hard to parse into seconds here...).
}
} finally {
if (model != null)
model.delete();
if (model2 != null)
model2.delete();
}
} finally {
if (frame != null)
frame.remove();
Scope.exit();
}
}
Aggregations