use of hex.deeplearning.DeepLearning in project h2o-2 by h2oai.
the class DeepLearningIrisTest method runFraction.
void runFraction(float fraction) {
long seed0 = 0xDECAF;
int num_runs = 0;
for (int repeat = 0; repeat < 5; ++repeat) {
// Testing different things
// Note: Microsoft reference implementation is only for Tanh + MSE, rectifier and MCE are implemented by 0xdata (trivial).
// Note: Initial weight distributions are copied, but what is tested is the stability behavior.
DeepLearning.Activation[] activations = { DeepLearning.Activation.Tanh, DeepLearning.Activation.Rectifier };
DeepLearning.Loss[] losses = { DeepLearning.Loss.MeanSquare, DeepLearning.Loss.CrossEntropy };
DeepLearning.InitialWeightDistribution[] dists = { DeepLearning.InitialWeightDistribution.Normal, DeepLearning.InitialWeightDistribution.Uniform, DeepLearning.InitialWeightDistribution.UniformAdaptive };
final long seed = seed0 + repeat;
Random rng = new Random(seed);
double[] initial_weight_scales = { 1e-4 + rng.nextDouble() };
double[] holdout_ratios = { 0.1 + rng.nextDouble() * 0.8 };
double[] momenta = { rng.nextDouble() * 0.99 };
int[] hiddens = { 1, 2 + rng.nextInt(50) };
int[] epochs = { 1, 2 + rng.nextInt(50) };
double[] rates = { 0.01, 1e-5 + rng.nextDouble() * .1 };
for (DeepLearning.Activation activation : activations) {
for (DeepLearning.Loss loss : losses) {
for (DeepLearning.InitialWeightDistribution dist : dists) {
for (double scale : initial_weight_scales) {
for (double holdout_ratio : holdout_ratios) {
for (double momentum : momenta) {
for (int hidden : hiddens) {
for (int epoch : epochs) {
for (double rate : rates) {
for (boolean sparse : new boolean[] { true, false }) {
for (boolean col_major : new boolean[] { false }) {
DeepLearningModel mymodel = null;
Frame frame = null;
Frame fr = null;
DeepLearning p = null;
Frame trainPredict = null;
Frame testPredict = null;
try {
if (col_major && !sparse)
continue;
num_runs++;
if (fraction < rng.nextFloat())
continue;
Log.info("");
Log.info("STARTING.");
Log.info("Running with " + activation.name() + " activation function and " + loss.name() + " loss function.");
Log.info("Initialization with " + dist.name() + " distribution and " + scale + " scale, holdout ratio " + holdout_ratio);
Log.info("Using " + hidden + " hidden layers and momentum: " + momentum);
Log.info("Using seed " + seed);
Key file = NFSFileVec.make(find_test_file(PATH));
frame = ParseDataset2.parse(Key.make("iris_nn2"), new Key[] { file });
Random rand;
int trial = 0;
FrameTask.DataInfo dinfo;
do {
Log.info("Trial #" + ++trial);
if (_train != null)
_train.delete();
if (_test != null)
_test.delete();
if (fr != null)
fr.delete();
rand = Utils.getDeterRNG(seed);
double[][] rows = new double[(int) frame.numRows()][frame.numCols()];
String[] names = new String[frame.numCols()];
for (int c = 0; c < frame.numCols(); c++) {
names[c] = "ColumnName" + c;
for (int r = 0; r < frame.numRows(); r++) rows[r][c] = frame.vecs()[c].at(r);
}
for (int i = rows.length - 1; i >= 0; i--) {
int shuffle = rand.nextInt(i + 1);
double[] row = rows[shuffle];
rows[shuffle] = rows[i];
rows[i] = row;
}
int limit = (int) (frame.numRows() * holdout_ratio);
_train = frame(names, Utils.subarray(rows, 0, limit));
_test = frame(names, Utils.subarray(rows, limit, (int) frame.numRows() - limit));
p = new DeepLearning();
p.source = _train;
p.response = _train.lastVec();
p.ignored_cols = null;
p.ignore_const_cols = true;
fr = FrameTask.DataInfo.prepareFrame(p.source, p.response, p.ignored_cols, true, p.ignore_const_cols);
dinfo = new FrameTask.DataInfo(fr, 1, true, false, FrameTask.DataInfo.TransformType.STANDARDIZE);
} while (// must have all output classes in training data (since that's what the reference implementation has hardcoded)
dinfo._adaptedFrame.lastVec().domain().length < 3);
// use the same seed for the reference implementation
DeepLearningMLPReference ref = new DeepLearningMLPReference();
ref.init(activation, Utils.getDeterRNG(seed), holdout_ratio, hidden);
p.seed = seed;
p.hidden = new int[] { hidden };
p.adaptive_rate = false;
p.rho = 0;
p.epsilon = 0;
//adapt to (1-m) correction that's done inside (only for constant momentum!)
p.rate = rate / (1 - momentum);
p.activation = activation;
p.max_w2 = Float.POSITIVE_INFINITY;
p.epochs = epoch;
p.input_dropout_ratio = 0;
//do not change - not implemented in reference
p.rate_annealing = 0;
p.l1 = 0;
p.loss = loss;
p.l2 = 0;
//reference only supports constant momentum
p.momentum_stable = momentum;
//do not change - not implemented in reference
p.momentum_start = p.momentum_stable;
//do not change - not implemented in reference
p.momentum_ramp = 0;
p.initial_weight_distribution = dist;
p.initial_weight_scale = scale;
p.classification = true;
p.diagnostics = true;
p.validation = null;
p.quiet_mode = true;
//to be the same as reference
p.fast_mode = false;
// p.fast_mode = true; //to be the same as old NeuralNet code
//to be the same as reference
p.nesterov_accelerated_gradient = false;
// p.nesterov_accelerated_gradient = true; //to be the same as old NeuralNet code
//sync once per period
p.train_samples_per_iteration = 0;
p.ignore_const_cols = false;
p.shuffle_training_data = false;
//don't stop early -> need to compare against reference, which doesn't stop either
p.classification_stop = -1;
//keep just 1 chunk for reproducibility
p.force_load_balance = false;
//keep just 1 chunk for reproducibility
p.override_with_best_model = false;
p.replicate_training_data = false;
p.single_node_mode = true;
p.sparse = sparse;
p.col_major = col_major;
//randomize weights, but don't start training yet
mymodel = p.initModel();
Neurons[] neurons = DeepLearningTask.makeNeuronsForTraining(mymodel.model_info());
// use the same random weights for the reference implementation
Neurons l = neurons[1];
for (int o = 0; o < l._a.size(); o++) {
for (int i = 0; i < l._previous._a.size(); i++) {
// System.out.println("initial weight[" + o + "]=" + l._w[o * l._previous._a.length + i]);
ref._nn.ihWeights[i][o] = l._w.get(o, i);
}
ref._nn.hBiases[o] = l._b.get(o);
// System.out.println("initial bias[" + o + "]=" + l._b[o]);
}
l = neurons[2];
for (int o = 0; o < l._a.size(); o++) {
for (int i = 0; i < l._previous._a.size(); i++) {
// System.out.println("initial weight[" + o + "]=" + l._w[o * l._previous._a.length + i]);
ref._nn.hoWeights[i][o] = l._w.get(o, i);
}
ref._nn.oBiases[o] = l._b.get(o);
// System.out.println("initial bias[" + o + "]=" + l._b[o]);
}
// Train the Reference
ref.train((int) p.epochs, rate, p.momentum_stable, loss);
// Train H2O
mymodel = p.trainModel(mymodel);
Assert.assertTrue(mymodel.model_info().get_processed_total() == epoch * fr.numRows());
/**
* Tolerances (should ideally be super tight -> expect the same double/float precision math inside both algos)
*/
final double abseps = 1e-4;
final double releps = 1e-4;
/**
* Compare weights and biases in hidden layer
*/
//link the weights to the neurons, for easy access
neurons = DeepLearningTask.makeNeuronsForTesting(mymodel.model_info());
l = neurons[1];
for (int o = 0; o < l._a.size(); o++) {
for (int i = 0; i < l._previous._a.size(); i++) {
double a = ref._nn.ihWeights[i][o];
double b = l._w.get(o, i);
compareVal(a, b, abseps, releps);
// System.out.println("weight[" + o + "]=" + b);
}
double ba = ref._nn.hBiases[o];
double bb = l._b.get(o);
compareVal(ba, bb, abseps, releps);
}
Log.info("Weights and biases for hidden layer: PASS");
/**
* Compare weights and biases for output layer
*/
l = neurons[2];
for (int o = 0; o < l._a.size(); o++) {
for (int i = 0; i < l._previous._a.size(); i++) {
double a = ref._nn.hoWeights[i][o];
double b = l._w.get(o, i);
compareVal(a, b, abseps, releps);
}
double ba = ref._nn.oBiases[o];
double bb = l._b.get(o);
compareVal(ba, bb, abseps, releps);
}
Log.info("Weights and biases for output layer: PASS");
/**
* Compare predictions
* Note: Reference and H2O each do their internal data normalization,
* so we must use their "own" test data, which is assumed to be created correctly.
*/
// H2O predictions
//[0] is label, [1]...[4] are the probabilities
Frame fpreds = mymodel.score(_test);
try {
for (int i = 0; i < _test.numRows(); ++i) {
// Reference predictions
double[] xValues = new double[neurons[0]._a.size()];
System.arraycopy(ref._testData[i], 0, xValues, 0, xValues.length);
double[] ref_preds = ref._nn.ComputeOutputs(xValues);
// find the label
// do the same as H2O here (compare float values and break ties based on row number)
float[] preds = new float[ref_preds.length + 1];
for (int j = 0; j < ref_preds.length; ++j) preds[j + 1] = (float) ref_preds[j];
preds[0] = getPrediction(preds, i);
// compare predicted label
Assert.assertTrue(preds[0] == (int) fpreds.vecs()[0].at(i));
// // compare predicted probabilities
// for (int j=0; j<ref_preds.length; ++j) {
// compareVal((float)(ref_preds[j]), fpreds.vecs()[1+j].at(i), abseps, releps);
// }
}
} finally {
if (fpreds != null)
fpreds.delete();
}
Log.info("Predicted values: PASS");
/**
* Compare (self-reported) scoring
*/
final double trainErr = ref._nn.Accuracy(ref._trainData);
final double testErr = ref._nn.Accuracy(ref._testData);
trainPredict = mymodel.score(_train, false);
final double myTrainErr = mymodel.calcError(_train, _train.lastVec(), trainPredict, trainPredict, "Final training error:", true, p.max_confusion_matrix_size, new water.api.ConfusionMatrix(), null, null);
testPredict = mymodel.score(_test, false);
final double myTestErr = mymodel.calcError(_test, _test.lastVec(), testPredict, testPredict, "Final testing error:", true, p.max_confusion_matrix_size, new water.api.ConfusionMatrix(), null, null);
Log.info("H2O training error : " + myTrainErr * 100 + "%, test error: " + myTestErr * 100 + "%");
Log.info("REF training error : " + trainErr * 100 + "%, test error: " + testErr * 100 + "%");
compareVal(trainErr, myTrainErr, abseps, releps);
compareVal(testErr, myTestErr, abseps, releps);
Log.info("Scoring: PASS");
// get the actual best error on training data
float best_err = Float.MAX_VALUE;
for (DeepLearningModel.Errors err : mymodel.scoring_history()) {
//multi-class classification
best_err = Math.min(best_err, (float) err.train_err);
}
Log.info("Actual best error : " + best_err * 100 + "%.");
// this is enabled by default
if (p.override_with_best_model) {
Frame bestPredict = null;
try {
bestPredict = mymodel.score(_train, false);
final double bestErr = mymodel.calcError(_train, _train.lastVec(), bestPredict, bestPredict, "Best error:", true, p.max_confusion_matrix_size, new water.api.ConfusionMatrix(), null, null);
Log.info("Best_model's error : " + bestErr * 100 + "%.");
compareVal(bestErr, best_err, abseps, releps);
} finally {
if (bestPredict != null)
bestPredict.delete();
}
}
Log.info("Parameters combination " + num_runs + ": PASS");
} finally {
// cleanup
if (mymodel != null) {
mymodel.delete_best_model();
mymodel.delete();
}
if (_train != null)
_train.delete();
if (_test != null)
_test.delete();
if (frame != null)
frame.delete();
if (fr != null)
fr.delete();
if (p != null)
p.delete();
if (trainPredict != null)
trainPredict.delete();
if (testPredict != null)
testPredict.delete();
}
}
}
}
}
}
}
}
}
}
}
}
}
}
use of hex.deeplearning.DeepLearning in project h2o-2 by h2oai.
the class DeepLearningProstateTest method runFraction.
public void runFraction(float fraction) {
long seed = 0xDECAF;
Random rng = new Random(seed);
String[] datasets = new String[2];
int[][] responses = new int[datasets.length][];
//CAPSULE (binomial), AGE (regression), GLEASON (multi-class)
datasets[0] = "smalldata/./logreg/prostate.csv";
//CAPSULE (binomial), AGE (regression), GLEASON (multi-class)
responses[0] = new int[] { 1, 2, 8 };
//Iris-type (multi-class)
datasets[1] = "smalldata/iris/iris.csv";
//Iris-type (multi-class)
responses[1] = new int[] { 4 };
int testcount = 0;
int count = 0;
for (int i = 0; i < datasets.length; ++i) {
String dataset = datasets[i];
Key file = NFSFileVec.make(find_test_file(dataset));
Frame frame = ParseDataset2.parse(Key.make(), new Key[] { file });
Key vfile = NFSFileVec.make(find_test_file(dataset));
Frame vframe = ParseDataset2.parse(Key.make(), new Key[] { vfile });
try {
for (boolean replicate : new boolean[] { true, false }) {
for (boolean load_balance : new boolean[] { true, false }) {
for (boolean shuffle : new boolean[] { true, false }) {
for (boolean balance_classes : new boolean[] { true, false }) {
for (int resp : responses[i]) {
for (DeepLearning.ClassSamplingMethod csm : new DeepLearning.ClassSamplingMethod[] { DeepLearning.ClassSamplingMethod.Stratified, DeepLearning.ClassSamplingMethod.Uniform }) {
for (int scoretraining : new int[] { 200, 20, 0 }) {
for (int scorevalidation : new int[] { 200, 20, 0 }) {
for (int vf : new int[] { //no validation
0, //same as source
1, //different validation frame
-1 }) {
for (int n_folds : new int[] { 0, 2 }) {
if (n_folds != 0 && vf != 0)
continue;
for (boolean keep_cv_splits : new boolean[] { false }) {
//otherwise it leaks
for (boolean override_with_best_model : new boolean[] { false, true }) {
for (int train_samples_per_iteration : new int[] { //auto-tune
-2, //N epochs per iteration
-1, //1 epoch per iteration
0, // <1 epoch per iteration
rng.nextInt(200), //>1 epoch per iteration
500 }) {
DeepLearningModel model1 = null, model2 = null;
Key dest = null, dest_tmp = null;
count++;
if (fraction < rng.nextFloat())
continue;
try {
Log.info("**************************)");
Log.info("Starting test #" + count);
Log.info("**************************)");
final double epochs = 7 + rng.nextDouble() + rng.nextInt(4);
final int[] hidden = new int[] { 1 + rng.nextInt(4), 1 + rng.nextInt(6) };
//no validation
Frame valid = null;
if (//use the same frame for validation
vf == 1)
//use the same frame for validation
valid = frame;
else //different validation frame (here: from the same file)
if (vf == -1)
valid = vframe;
// build the model, with all kinds of shuffling/rebalancing/sampling
dest_tmp = Key.make("first");
{
Log.info("Using seed: " + seed);
DeepLearning p = new DeepLearning();
p.checkpoint = null;
p.destination_key = dest_tmp;
p.source = frame;
p.response = frame.vecs()[resp];
p.validation = valid;
p.hidden = hidden;
if (i == 0 && resp == 2)
p.classification = false;
// p.best_model_key = best_model_key;
p.override_with_best_model = override_with_best_model;
p.epochs = epochs;
p.n_folds = n_folds;
p.keep_cross_validation_splits = keep_cv_splits;
p.seed = seed;
p.train_samples_per_iteration = train_samples_per_iteration;
p.force_load_balance = load_balance;
p.replicate_training_data = replicate;
p.shuffle_training_data = shuffle;
p.score_training_samples = scoretraining;
p.score_validation_samples = scorevalidation;
p.classification_stop = -1;
p.regression_stop = -1;
p.balance_classes = balance_classes;
p.quiet_mode = true;
p.score_validation_sampling = csm;
try {
p.invoke();
} catch (Throwable t) {
t.printStackTrace();
throw new RuntimeException(t);
} finally {
p.delete();
}
model1 = UKV.get(dest_tmp);
assert (((p.train_samples_per_iteration <= 0 || p.train_samples_per_iteration >= frame.numRows()) && model1.epoch_counter > epochs) || Math.abs(model1.epoch_counter - epochs) / epochs < 0.20);
if (n_folds != 0) // test HTML of cv models
{
for (Key k : model1.get_params().xval_models) {
DeepLearningModel cv_model = UKV.get(k);
StringBuilder sb = new StringBuilder();
cv_model.generateHTML("cv", sb);
cv_model.delete_best_model();
cv_model.delete();
}
}
}
// Do some more training via checkpoint restart
// For n_folds, continue without n_folds (not yet implemented) - from now on, model2 will have n_folds=0...
dest = Key.make("restart");
DeepLearning p = new DeepLearning();
//this actually *requires* frame to also still be in UKV (because of DataInfo...)
final DeepLearningModel tmp_model = UKV.get(dest_tmp);
//HEX-1817
Assert.assertTrue(tmp_model.get_params().state == Job.JobState.DONE);
Assert.assertTrue(tmp_model.model_info().get_processed_total() >= frame.numRows() * epochs);
assert (tmp_model != null);
p.checkpoint = dest_tmp;
p.destination_key = dest;
p.n_folds = 0;
p.source = frame;
p.validation = valid;
p.response = frame.vecs()[resp];
if (i == 0 && resp == 2)
p.classification = false;
p.override_with_best_model = override_with_best_model;
p.epochs = epochs;
p.seed = seed;
p.train_samples_per_iteration = train_samples_per_iteration;
try {
p.invoke();
} catch (Throwable t) {
t.printStackTrace();
throw new RuntimeException(t);
} finally {
p.delete();
}
// score and check result (on full data)
//this actually *requires* frame to also still be in UKV (because of DataInfo...)
model2 = UKV.get(dest);
//HEX-1817
Assert.assertTrue(model2.get_params().state == Job.JobState.DONE);
// test HTML
{
StringBuilder sb = new StringBuilder();
model2.generateHTML("test", sb);
}
// score and check result of the best_model
if (model2.actual_best_model_key != null) {
final DeepLearningModel best_model = UKV.get(model2.actual_best_model_key);
//HEX-1817
Assert.assertTrue(best_model.get_params().state == Job.JobState.DONE);
// test HTML
{
StringBuilder sb = new StringBuilder();
best_model.generateHTML("test", sb);
}
if (override_with_best_model) {
Assert.assertEquals(best_model.error(), model2.error(), 0);
}
}
if (valid == null)
valid = frame;
double threshold = 0;
if (model2.isClassifier()) {
Frame pred = null, pred2 = null;
try {
pred = model2.score(valid);
StringBuilder sb = new StringBuilder();
AUC auc = new AUC();
double error = 0;
// binary
if (model2.nclasses() == 2) {
auc.actual = valid;
assert (resp == 1);
auc.vactual = valid.vecs()[resp];
auc.predict = pred;
auc.vpredict = pred.vecs()[2];
auc.invoke();
auc.toASCII(sb);
AUCData aucd = auc.data();
threshold = aucd.threshold();
error = aucd.err();
Log.info(sb);
// check that auc.cm() is the right CM
Assert.assertEquals(new ConfusionMatrix(aucd.cm()).err(), error, 1e-15);
// check that calcError() is consistent as well (for CM=null, AUC!=null)
Assert.assertEquals(model2.calcError(valid, auc.vactual, pred, pred, "training", false, 0, null, auc, null), error, 1e-15);
}
// Compute CM
double CMerrorOrig;
{
sb = new StringBuilder();
water.api.ConfusionMatrix CM = new water.api.ConfusionMatrix();
CM.actual = valid;
CM.vactual = valid.vecs()[resp];
CM.predict = pred;
CM.vpredict = pred.vecs()[0];
CM.invoke();
sb.append("\n");
sb.append("Threshold: " + "default\n");
CM.toASCII(sb);
Log.info(sb);
CMerrorOrig = new ConfusionMatrix(CM.cm).err();
}
// confirm that orig CM was made with threshold 0.5
// put pred2 into UKV, and allow access
pred2 = new Frame(Key.make("pred2"), pred.names(), pred.vecs());
pred2.delete_and_lock(null);
pred2.unlock(null);
if (model2.nclasses() == 2) {
// make labels with 0.5 threshold for binary classifier
Env ev = Exec2.exec("pred2[,1]=pred2[,3]>=" + 0.5);
try {
pred2 = ev.popAry();
String skey = ev.key();
ev.subRef(pred2, skey);
} finally {
if (ev != null)
ev.remove_and_unlock();
}
water.api.ConfusionMatrix CM = new water.api.ConfusionMatrix();
CM.actual = valid;
CM.vactual = valid.vecs()[1];
CM.predict = pred2;
CM.vpredict = pred2.vecs()[0];
CM.invoke();
sb = new StringBuilder();
sb.append("\n");
sb.append("Threshold: " + 0.5 + "\n");
CM.toASCII(sb);
Log.info(sb);
double threshErr = new ConfusionMatrix(CM.cm).err();
Assert.assertEquals(threshErr, CMerrorOrig, 1e-15);
// make labels with AUC-given threshold for best F1
ev = Exec2.exec("pred2[,1]=pred2[,3]>=" + threshold);
try {
pred2 = ev.popAry();
String skey = ev.key();
ev.subRef(pred2, skey);
} finally {
if (ev != null)
ev.remove_and_unlock();
}
CM = new water.api.ConfusionMatrix();
CM.actual = valid;
CM.vactual = valid.vecs()[1];
CM.predict = pred2;
CM.vpredict = pred2.vecs()[0];
CM.invoke();
sb = new StringBuilder();
sb.append("\n");
sb.append("Threshold: ").append(threshold).append("\n");
CM.toASCII(sb);
Log.info(sb);
double threshErr2 = new ConfusionMatrix(CM.cm).err();
Assert.assertEquals(threshErr2, error, 1e-15);
}
} finally {
if (pred != null)
pred.delete();
if (pred2 != null)
pred2.delete();
}
}
//classifier
Log.info("Parameters combination " + count + ": PASS");
testcount++;
} catch (Throwable t) {
t.printStackTrace();
throw new RuntimeException(t);
} finally {
if (model1 != null) {
model1.delete_xval_models();
model1.delete_best_model();
model1.delete();
}
if (model2 != null) {
model2.delete_xval_models();
model2.delete_best_model();
model2.delete();
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
} finally {
frame.delete();
vframe.delete();
}
}
Log.info("\n\n=============================================");
Log.info("Tested " + testcount + " out of " + count + " parameter combinations.");
Log.info("=============================================");
}
use of hex.deeplearning.DeepLearning in project h2o-2 by h2oai.
the class DeepLearningReproducibilityTest method run.
@Test
public void run() {
long seed = new Random().nextLong();
DeepLearningModel mymodel = null;
Frame train = null;
Frame test = null;
Frame data = null;
Log.info("");
Log.info("STARTING.");
Log.info("Using seed " + seed);
Map<Integer, Float> repeatErrs = new TreeMap<Integer, Float>();
int N = 6;
StringBuilder sb = new StringBuilder();
float repro_error = 0;
for (boolean repro : new boolean[] { true, false }) {
Frame[] preds = new Frame[N];
for (int repeat = 0; repeat < N; ++repeat) {
try {
Key file = NFSFileVec.make(find_test_file("smalldata/weather.csv"));
// Key file = NFSFileVec.make(find_test_file("smalldata/mnist/test.csv.gz"));
data = ParseDataset2.parse(Key.make("data.hex"), new Key[] { file });
// Create holdout test data on clean data (before adding missing values)
FrameSplitter fs = new FrameSplitter(data, new float[] { 0.75f });
H2O.submitTask(fs).join();
Frame[] train_test = fs.getResult();
train = train_test[0];
test = train_test[1];
// Build a regularized DL model with polluted training data, score on clean validation set
DeepLearning p;
p = new DeepLearning();
p.source = train;
p.validation = test;
p.response = train.lastVec();
//for weather data
p.ignored_cols = new int[] { 1, 22 };
p.activation = DeepLearning.Activation.RectifierWithDropout;
p.hidden = new int[] { 32, 58 };
p.l1 = 1e-5;
p.l2 = 3e-5;
p.seed = 0xbebe;
p.input_dropout_ratio = 0.2;
p.hidden_dropout_ratios = new double[] { 0.4, 0.1 };
p.epochs = 3.32;
p.quiet_mode = true;
p.reproducible = repro;
try {
Log.info("Starting with #" + repeat);
p.invoke();
} catch (Throwable t) {
t.printStackTrace();
throw new RuntimeException(t);
} finally {
p.delete();
}
// Extract the scoring on validation set from the model
mymodel = UKV.get(p.dest());
preds[repeat] = mymodel.score(test);
repeatErrs.put(repeat, mymodel.error());
} catch (Throwable t) {
t.printStackTrace();
throw new RuntimeException(t);
} finally {
// cleanup
if (mymodel != null) {
mymodel.delete_xval_models();
mymodel.delete_best_model();
mymodel.delete();
}
if (train != null)
train.delete();
if (test != null)
test.delete();
if (data != null)
data.delete();
}
}
sb.append("Reproducibility: " + (repro ? "on" : "off") + "\n");
sb.append("Repeat # --> Validation Error\n");
for (String s : Arrays.toString(repeatErrs.entrySet().toArray()).split(",")) sb.append(s.replace("=", " --> ")).append("\n");
sb.append('\n');
Log.info(sb.toString());
try {
if (repro) {
// check reproducibility
for (Float error : repeatErrs.values()) {
Assert.assertTrue(error.equals(repeatErrs.get(0)));
}
for (Frame f : preds) {
Assert.assertTrue(f.isIdentical(preds[0]));
}
repro_error = repeatErrs.get(0);
} else {
// check standard deviation of non-reproducible mode
double mean = 0;
for (Float error : repeatErrs.values()) {
mean += error;
}
mean /= N;
Log.info("mean error: " + mean);
double stddev = 0;
for (Float error : repeatErrs.values()) {
stddev += (error - mean) * (error - mean);
}
stddev /= N;
stddev = Math.sqrt(stddev);
Log.info("standard deviation: " + stddev);
Assert.assertTrue(stddev < 0.1 / Math.sqrt(N));
Log.info("difference to reproducible mode: " + Math.abs(mean - repro_error) / stddev + " standard deviations");
}
} finally {
for (Frame f : preds) if (f != null)
f.delete();
}
}
}
use of hex.deeplearning.DeepLearning in project h2o-2 by h2oai.
the class DeepLearningSpiralsTest method run.
@Test
public void run() {
Key file = NFSFileVec.make(find_test_file("smalldata/neural/two_spiral.data"));
Frame frame = ParseDataset2.parse(Key.make(), new Key[] { file });
Key dest = Key.make("spirals2");
for (boolean sparse : new boolean[] { true, false }) {
for (boolean col_major : new boolean[] { false }) {
if (!sparse && col_major)
continue;
// build the model
{
DeepLearning p = new DeepLearning();
p.seed = 0xbabe;
p.epochs = 10000;
p.hidden = new int[] { 100 };
p.sparse = sparse;
p.col_major = col_major;
p.activation = DeepLearning.Activation.Tanh;
p.max_w2 = Float.POSITIVE_INFINITY;
p.l1 = 0;
p.l2 = 0;
p.initial_weight_distribution = DeepLearning.InitialWeightDistribution.Normal;
p.initial_weight_scale = 2.5;
p.loss = DeepLearning.Loss.CrossEntropy;
p.source = frame;
p.response = frame.lastVec();
p.validation = null;
p.score_interval = 2;
p.ignored_cols = null;
//sync once per period
p.train_samples_per_iteration = 0;
p.quiet_mode = true;
p.fast_mode = true;
p.ignore_const_cols = true;
p.nesterov_accelerated_gradient = true;
p.classification = true;
p.diagnostics = true;
p.expert_mode = true;
p.score_training_samples = 1000;
p.score_validation_samples = 10000;
p.shuffle_training_data = false;
p.force_load_balance = false;
p.replicate_training_data = false;
p.destination_key = dest;
p.adaptive_rate = true;
p.reproducible = true;
p.rho = 0.99;
p.epsilon = 5e-3;
p.invoke();
}
// score and check result
{
DeepLearningModel mymodel = UKV.get(dest);
double error = mymodel.error();
if (error >= 0.025) {
Assert.fail("Classification error is not less than 0.025, but " + error + ".");
}
mymodel.delete();
mymodel.delete_best_model();
}
}
}
frame.delete();
}
use of hex.deeplearning.DeepLearning in project h2o-2 by h2oai.
the class DeepLearningVsNeuralNet method compare.
@Ignore
@Test
public void compare() throws Exception {
final long seed = 0xc0ffee;
Random rng = new Random(seed);
DeepLearning.Activation[] activations = { DeepLearning.Activation.Maxout, DeepLearning.Activation.MaxoutWithDropout, DeepLearning.Activation.Tanh, DeepLearning.Activation.TanhWithDropout, DeepLearning.Activation.Rectifier, DeepLearning.Activation.RectifierWithDropout };
DeepLearning.Loss[] losses = { DeepLearning.Loss.MeanSquare, DeepLearning.Loss.CrossEntropy };
DeepLearning.InitialWeightDistribution[] dists = { DeepLearning.InitialWeightDistribution.Normal, DeepLearning.InitialWeightDistribution.Uniform, DeepLearning.InitialWeightDistribution.UniformAdaptive };
double[] initial_weight_scales = { 1e-3 + 1e-2 * rng.nextFloat() };
double[] holdout_ratios = { 0.7 + 0.2 * rng.nextFloat() };
int[][] hiddens = { { 1 }, { 1 + rng.nextInt(50) }, { 17, 13 }, { 20, 10, 5 } };
double[] rates = { 0.005 + 1e-2 * rng.nextFloat() };
int[] epochs = { 5 + rng.nextInt(5) };
double[] input_dropouts = { 0, rng.nextFloat() * 0.5 };
double p0 = 0.5 * rng.nextFloat();
long pR = 1000 + rng.nextInt(1000);
double p1 = 0.5 + 0.49 * rng.nextFloat();
double l1 = 1e-5 * rng.nextFloat();
double l2 = 1e-5 * rng.nextFloat();
// rng.nextInt(50);
float max_w2 = Float.POSITIVE_INFINITY;
double rate_annealing = 1e-7 + rng.nextFloat() * 1e-6;
boolean threaded = false;
int num_repeats = 1;
// TODO: test that Deep Learning and NeuralNet agree for Mnist dataset
// String[] files = { "smalldata/mnist/train.csv" };
// hiddens = new int[][]{ {50,50} };
// threaded = true;
// num_repeats = 5;
// TODO: test that Deep Learning and NeuralNet agree for covtype dataset
// String[] files = { "smalldata/covtype/covtype.20k.data.my" };
// hiddens = new int[][]{ {100,100} };
// epochs = new int[]{ 50 };
// threaded = true;
// num_repeats = 2;
String[] files = { "smalldata/iris/iris.csv", "smalldata/neural/two_spiral.data" };
for (DeepLearning.Activation activation : activations) {
for (DeepLearning.Loss loss : losses) {
for (DeepLearning.InitialWeightDistribution dist : dists) {
for (double scale : initial_weight_scales) {
for (double holdout_ratio : holdout_ratios) {
for (double input_dropout : input_dropouts) {
for (int[] hidden : hiddens) {
for (int epoch : epochs) {
for (double rate : rates) {
for (String file : files) {
for (boolean fast_mode : new boolean[] { true, false }) {
float reftrainerr = 0, trainerr = 0;
float reftesterr = 0, testerr = 0;
float[] a = new float[hidden.length + 2];
float[] b = new float[hidden.length + 2];
float[] ba = new float[hidden.length + 2];
float[] bb = new float[hidden.length + 2];
long numweights = 0, numbiases = 0;
for (int repeat = 0; repeat < num_repeats; ++repeat) {
long myseed = seed + repeat;
Log.info("");
Log.info("STARTING.");
Log.info("Running with " + activation.name() + " activation function and " + loss.name() + " loss function.");
Log.info("Initialization with " + dist.name() + " distribution and " + scale + " scale, holdout ratio " + holdout_ratio);
Log.info("Using seed " + seed);
Key kfile = NFSFileVec.make(find_test_file(file));
Frame frame = ParseDataset2.parse(Key.make(), new Key[] { kfile });
_train = sampleFrame(frame, (long) (frame.numRows() * holdout_ratio), seed);
_test = sampleFrame(frame, (long) (frame.numRows() * (1 - holdout_ratio)), seed + 1);
// Train new Deep Learning
Neurons[] neurons;
DeepLearningModel mymodel;
{
DeepLearning p = new DeepLearning();
p.source = (Frame) _train.clone();
p.response = _train.lastVec();
p.ignored_cols = null;
p.seed = myseed;
p.hidden = hidden;
p.adaptive_rate = false;
p.rho = 0;
p.epsilon = 0;
p.rate = rate;
p.activation = activation;
p.max_w2 = max_w2;
p.epochs = epoch;
p.input_dropout_ratio = input_dropout;
p.rate_annealing = rate_annealing;
p.loss = loss;
p.l1 = l1;
p.l2 = l2;
p.momentum_start = p0;
p.momentum_ramp = pR;
p.momentum_stable = p1;
p.initial_weight_distribution = dist;
p.initial_weight_scale = scale;
p.classification = true;
p.diagnostics = true;
p.validation = null;
p.quiet_mode = true;
p.fast_mode = fast_mode;
//sync once per period
p.train_samples_per_iteration = 0;
//same as old NeuralNet code
p.ignore_const_cols = false;
//same as old NeuralNet code
p.shuffle_training_data = false;
//same as old NeuralNet code
p.nesterov_accelerated_gradient = true;
//don't stop early -> need to compare against old NeuralNet code, which doesn't stop either
p.classification_stop = -1;
//keep 1 chunk for reproducibility
p.force_load_balance = false;
p.replicate_training_data = false;
p.single_node_mode = true;
p.invoke();
mymodel = UKV.get(p.dest());
neurons = DeepLearningTask.makeNeuronsForTesting(mymodel.model_info());
}
// Reference: NeuralNet
Layer[] ls;
NeuralNetModel refmodel;
NeuralNet p = new NeuralNet();
{
Vec[] data = Utils.remove(_train.vecs(), _train.vecs().length - 1);
Vec labels = _train.lastVec();
p.seed = myseed;
p.hidden = hidden;
p.rate = rate;
p.max_w2 = max_w2;
p.epochs = epoch;
p.input_dropout_ratio = input_dropout;
p.rate_annealing = rate_annealing;
p.l1 = l1;
p.l2 = l2;
p.momentum_start = p0;
p.momentum_ramp = pR;
p.momentum_stable = p1;
if (dist == DeepLearning.InitialWeightDistribution.Normal)
p.initial_weight_distribution = InitialWeightDistribution.Normal;
else if (dist == DeepLearning.InitialWeightDistribution.Uniform)
p.initial_weight_distribution = InitialWeightDistribution.Uniform;
else if (dist == DeepLearning.InitialWeightDistribution.UniformAdaptive)
p.initial_weight_distribution = InitialWeightDistribution.UniformAdaptive;
p.initial_weight_scale = scale;
p.diagnostics = true;
p.fast_mode = fast_mode;
p.classification = true;
if (loss == DeepLearning.Loss.MeanSquare)
p.loss = Loss.MeanSquare;
else if (loss == DeepLearning.Loss.CrossEntropy)
p.loss = Loss.CrossEntropy;
ls = new Layer[hidden.length + 2];
ls[0] = new Layer.VecsInput(data, null);
for (int i = 0; i < hidden.length; ++i) {
if (activation == DeepLearning.Activation.Tanh) {
p.activation = NeuralNet.Activation.Tanh;
ls[1 + i] = new Layer.Tanh(hidden[i]);
} else if (activation == DeepLearning.Activation.TanhWithDropout) {
p.activation = Activation.TanhWithDropout;
ls[1 + i] = new Layer.TanhDropout(hidden[i]);
} else if (activation == DeepLearning.Activation.Rectifier) {
p.activation = Activation.Rectifier;
ls[1 + i] = new Layer.Rectifier(hidden[i]);
} else if (activation == DeepLearning.Activation.RectifierWithDropout) {
p.activation = Activation.RectifierWithDropout;
ls[1 + i] = new Layer.RectifierDropout(hidden[i]);
} else if (activation == DeepLearning.Activation.Maxout) {
p.activation = Activation.Maxout;
ls[1 + i] = new Layer.Maxout(hidden[i]);
} else if (activation == DeepLearning.Activation.MaxoutWithDropout) {
p.activation = Activation.MaxoutWithDropout;
ls[1 + i] = new Layer.MaxoutDropout(hidden[i]);
}
}
ls[ls.length - 1] = new Layer.VecSoftmax(labels, null);
for (int i = 0; i < ls.length; i++) {
ls[i].init(ls, i, p);
}
Trainer trainer;
if (threaded)
trainer = new Trainer.Threaded(ls, p.epochs, null, -1);
else
trainer = new Trainer.Direct(ls, p.epochs, null);
trainer.start();
trainer.join();
refmodel = new NeuralNetModel(null, null, _train, ls, p);
}
/**
* Compare MEAN weights and biases in hidden and output layer
*/
for (int n = 1; n < ls.length; ++n) {
Neurons l = neurons[n];
Layer ref = ls[n];
for (int o = 0; o < l._a.size(); o++) {
for (int i = 0; i < l._previous._a.size(); i++) {
a[n] += ref._w[o * l._previous._a.size() + i];
b[n] += l._w.raw()[o * l._previous._a.size() + i];
numweights++;
}
ba[n] += ref._b[o];
bb[n] += l._b.get(o);
numbiases++;
}
}
/**
* Compare predictions
* Note: Reference and H2O each do their internal data normalization,
* so we must use their "own" test data, which is assumed to be created correctly.
*/
water.api.ConfusionMatrix CM = new water.api.ConfusionMatrix();
// Deep Learning scoring
{
//[0] is label, [1]...[4] are the probabilities
Frame fpreds = mymodel.score(_train);
CM = new water.api.ConfusionMatrix();
CM.actual = _train;
CM.vactual = _train.lastVec();
CM.predict = fpreds;
CM.vpredict = fpreds.vecs()[0];
CM.invoke();
StringBuilder sb = new StringBuilder();
trainerr += new ConfusionMatrix(CM.cm).err();
for (String s : sb.toString().split("\n")) Log.info(s);
fpreds.delete();
//[0] is label, [1]...[4] are the probabilities
Frame fpreds2 = mymodel.score(_test);
CM = new water.api.ConfusionMatrix();
CM.actual = _test;
CM.vactual = _test.lastVec();
CM.predict = fpreds2;
CM.vpredict = fpreds2.vecs()[0];
CM.invoke();
sb = new StringBuilder();
CM.toASCII(sb);
testerr += new ConfusionMatrix(CM.cm).err();
for (String s : sb.toString().split("\n")) Log.info(s);
fpreds2.delete();
}
// NeuralNet scoring
long[][] cm;
{
Log.info("\nNeuralNet Scoring:");
//training set
NeuralNet.Errors train = NeuralNet.eval(ls, 0, null);
reftrainerr += train.classification;
//test set
final Frame[] adapted = refmodel.adapt(_test, false);
Vec[] data = Utils.remove(_test.vecs(), _test.vecs().length - 1);
Vec labels = _test.vecs()[_test.vecs().length - 1];
Layer.VecsInput input = (Layer.VecsInput) ls[0];
input.vecs = data;
input._len = data[0].length();
((Layer.VecSoftmax) ls[ls.length - 1]).vec = labels;
//WARNING: only works if training set is large enough to have all classes
int classes = ls[ls.length - 1].units;
cm = new long[classes][classes];
NeuralNet.Errors test = NeuralNet.eval(ls, 0, cm);
Log.info("\nNeuralNet Confusion Matrix:");
Log.info(new ConfusionMatrix(cm).toString());
reftesterr += test.classification;
adapted[1].delete();
}
Assert.assertEquals(cm[0][0], CM.cm[0][0]);
Assert.assertEquals(cm[1][0], CM.cm[1][0]);
Assert.assertEquals(cm[0][1], CM.cm[0][1]);
Assert.assertEquals(cm[1][1], CM.cm[1][1]);
// cleanup
mymodel.delete();
refmodel.delete();
_train.delete();
_test.delete();
frame.delete();
}
trainerr /= (float) num_repeats;
reftrainerr /= (float) num_repeats;
testerr /= (float) num_repeats;
reftesterr /= (float) num_repeats;
/**
* Tolerances
*/
final float abseps = threaded ? 1e-2f : 1e-7f;
final float releps = threaded ? 1e-2f : 1e-5f;
// training set scoring
Log.info("NeuralNet train error " + reftrainerr);
Log.info("Deep Learning train error " + trainerr);
compareVal(reftrainerr, trainerr, abseps, releps);
// test set scoring
Log.info("NeuralNet test error " + reftesterr);
Log.info("Deep Learning test error " + testerr);
compareVal(reftrainerr, trainerr, abseps, releps);
// mean weights/biases
for (int n = 1; n < hidden.length + 2; ++n) {
Log.info("NeuralNet mean weight for layer " + n + ": " + a[n] / numweights);
Log.info("Deep Learning mean weight for layer " + n + ": " + b[n] / numweights);
Log.info("NeuralNet mean bias for layer " + n + ": " + ba[n] / numbiases);
Log.info("Deep Learning mean bias for layer " + n + ": " + bb[n] / numbiases);
compareVal(a[n] / numweights, b[n] / numweights, abseps, releps);
compareVal(ba[n] / numbiases, bb[n] / numbiases, abseps, releps);
}
}
}
}
}
}
}
}
}
}
}
}
}
Aggregations