use of hex.Layer in project h2o-2 by h2oai.
the class NeuralNetMnistPretrain method preTrain.
private final void preTrain(Layer[] ls, int index, int epochs) {
// Build a network with same layers below 'index', and an auto-encoder at the top
Layer[] pre = new Layer[index + 2];
VecsInput input = (VecsInput) ls[0];
pre[0] = new VecsInput(input.vecs, input);
//clone the parameters
pre[0].init(pre, 0, ls[0].params);
for (int i = 1; i < index; i++) {
//pre[i] = new Layer.Rectifier(ls[i].units);
pre[i] = new Layer.Tanh(ls[i].units);
Layer.shareWeights(ls[i], pre[i]);
//share the parameters
pre[i].init(pre, i, ls[i].params);
//turn off training for these layers
pre[i].params.rate = 0;
}
// Auto-encoder is a layer and a reverse layer on top
//pre[index] = new Layer.Rectifier(ls[index].units);
//pre[index + 1] = new Layer.RectifierPrime(ls[index - 1].units);
pre[index] = new Layer.Tanh(ls[index].units);
pre[index].init(pre, index, ls[index].params);
pre[index].params.rate = 1e-5;
pre[index + 1] = new Layer.TanhPrime(ls[index - 1].units);
pre[index + 1].init(pre, index + 1, pre[index].params);
pre[index + 1].params.rate = 1e-5;
Layer.shareWeights(ls[index], pre[index]);
Layer.shareWeights(ls[index], pre[index + 1]);
_trainer = new Trainer.Direct(pre, epochs, self());
// Basic visualization of images and weights
JFrame frame = new JFrame("H2O Pre-Training");
frame.setDefaultCloseOperation(WindowConstants.EXIT_ON_CLOSE);
MnistCanvas canvas = new MnistCanvas(_trainer);
frame.setContentPane(canvas.init());
frame.pack();
frame.setLocationRelativeTo(null);
frame.setVisible(true);
_trainer.start();
_trainer.join();
}
use of hex.Layer in project h2o-2 by h2oai.
the class NeuralNetMnistPretrain method build.
@Override
protected Layer[] build(Vec[] data, Vec labels, VecsInput inputStats, VecSoftmax outputStats) {
Layer[] ls = new Layer[4];
ls[0] = new VecsInput(data, inputStats);
// ls[1] = new Layer.RectifierDropout(1024);
// ls[2] = new Layer.RectifierDropout(1024);
ls[1] = new Layer.Tanh(50);
ls[2] = new Layer.Tanh(50);
ls[3] = new VecSoftmax(labels, outputStats);
// Parameters for MNIST run
NeuralNet p = new NeuralNet();
//only used for NN run after pretraining
p.rate = 0.01;
p.activation = NeuralNet.Activation.Tanh;
p.loss = NeuralNet.Loss.CrossEntropy;
// p.rate_annealing = 1e-6f;
// p.max_w2 = 15;
// p.momentum_start = 0.5f;
// p.momentum_ramp = 60000 * 300;
// p.momentum_stable = 0.99f;
// p.l1 = .00001f;
// p.l2 = .00f;
p.initial_weight_distribution = NeuralNet.InitialWeightDistribution.UniformAdaptive;
for (int i = 0; i < ls.length; i++) {
ls[i].init(ls, i, p);
}
return ls;
}
use of hex.Layer in project h2o-2 by h2oai.
the class NeuralNetMnist method build.
protected Layer[] build(Vec[] data, Vec labels, VecsInput inputStats, VecSoftmax outputStats) {
//same parameters as in test_NN_mnist.py
Layer[] ls = new Layer[5];
ls[0] = new VecsInput(data, inputStats);
ls[1] = new Layer.RectifierDropout(117);
ls[2] = new Layer.RectifierDropout(131);
ls[3] = new Layer.RectifierDropout(129);
ls[ls.length - 1] = new VecSoftmax(labels, outputStats);
NeuralNet p = new NeuralNet();
p.seed = 98037452452l;
p.rate = 0.005;
p.rate_annealing = 1e-6;
p.activation = NeuralNet.Activation.RectifierWithDropout;
p.loss = NeuralNet.Loss.CrossEntropy;
p.input_dropout_ratio = 0.2;
p.max_w2 = 15;
p.epochs = 2;
p.l1 = 1e-5;
p.l2 = 0.0000001;
p.momentum_start = 0.5;
p.momentum_ramp = 100000;
p.momentum_stable = 0.99;
p.initial_weight_distribution = NeuralNet.InitialWeightDistribution.UniformAdaptive;
p.classification = true;
p.diagnostics = true;
p.expert_mode = true;
for (int i = 0; i < ls.length; i++) {
ls[i].init(ls, i, p);
}
return ls;
}
use of hex.Layer in project h2o-2 by h2oai.
the class NeuralNet method startTrain.
void startTrain() {
logStart();
running = true;
// Vec[] vecs = Utils.append(_train, response);
// reChunk(vecs);
// final Vec[] train = new Vec[vecs.length - 1];
// System.arraycopy(vecs, 0, train, 0, train.length);
// final Vec trainResp = classification ? vecs[vecs.length - 1].toEnum() : vecs[vecs.length - 1];
final Vec[] train = _train;
final Vec trainResp = classification ? response.toEnum() : response;
final Layer[] ls = new Layer[hidden.length + 2];
ls[0] = new VecsInput(train, null);
for (int i = 0; i < hidden.length; i++) {
switch(activation) {
case Tanh:
ls[i + 1] = new Tanh(hidden[i]);
break;
case TanhWithDropout:
ls[i + 1] = new TanhDropout(hidden[i]);
break;
case Rectifier:
ls[i + 1] = new Rectifier(hidden[i]);
break;
case RectifierWithDropout:
ls[i + 1] = new RectifierDropout(hidden[i]);
break;
case Maxout:
ls[i + 1] = new Maxout(hidden[i]);
break;
case MaxoutWithDropout:
ls[i + 1] = new MaxoutDropout(hidden[i]);
break;
}
}
if (classification)
ls[ls.length - 1] = new VecSoftmax(trainResp, null);
else
ls[ls.length - 1] = new VecLinear(trainResp, null);
//copy parameters from NeuralNet, and set previous/input layer links
for (int i = 0; i < ls.length; i++) ls[i].init(ls, i, this);
final Key sourceKey = Key.make(input("source"));
final Frame frame = new Frame(_names, train);
frame.add(_responseName, trainResp);
final Errors[] trainErrors0 = new Errors[] { new Errors() };
final Errors[] validErrors0 = validation == null ? null : new Errors[] { new Errors() };
NeuralNetModel model = new NeuralNetModel(destination_key, sourceKey, frame, ls, this);
model.training_errors = trainErrors0;
model.validation_errors = validErrors0;
model.delete_and_lock(self());
final Frame[] adapted = validation == null ? null : model.adapt(validation, false);
final Trainer trainer;
final long num_rows = source.numRows();
if (mode == SingleThread) {
Log.info("Entering single-threaded execution mode");
trainer = new Trainer.Direct(ls, epochs, self());
} else {
// one node works on the first batch of points serially for improved stability
if (warmup_samples > 0) {
Log.info("Training the first " + warmup_samples + " samples in serial for improved stability.");
Trainer warmup = new Trainer.Direct(ls, (double) warmup_samples / num_rows, self());
warmup.start();
warmup.join();
//TODO: for MapReduce send weights from master VM to all other VMs
}
if (mode == SingleNode) {
Log.info("Entering single-node (multi-threaded Hogwild) execution mode.");
trainer = new Trainer.Threaded(ls, epochs, self(), -1);
} else if (mode == MapReduce) {
if (warmup_samples > 0 && mode == MapReduce) {
Log.info("Multi-threaded warmup with " + warmup_samples + " samples.");
Trainer warmup = new Trainer.Threaded(ls, (double) warmup_samples / num_rows, self(), -1);
warmup.start();
warmup.join();
//TODO: for MapReduce send weights from master VM to all other VMs
}
Log.info("Entering multi-node (MapReduce + multi-threaded Hogwild) execution mode.");
trainer = new Trainer.MapReduce(ls, epochs, self());
} else
throw new RuntimeException("invalid execution mode.");
}
Log.info("Running for " + epochs + " epochs.");
final NeuralNet nn = this;
// Use a separate thread for monitoring (blocked most of the time)
Thread monitor = new Thread() {
Errors[] trainErrors = trainErrors0, validErrors = validErrors0;
@Override
public void run() {
try {
Vec[] valid = null;
Vec validResp = null;
if (validation != null) {
assert adapted != null;
final Vec[] vs = adapted[0].vecs();
valid = Arrays.copyOf(vs, vs.length - 1);
System.arraycopy(adapted[0].vecs(), 0, valid, 0, valid.length);
validResp = vs[vs.length - 1];
}
//score the model every 2 seconds (or less often, if it takes longer to score)
final long num_samples_total = (long) (Math.ceil(num_rows * epochs));
long num = -1, last_eval = runTimeMs();
do {
//time between evaluations
final long interval = (long) (score_interval * 1000);
long time_taken = runTimeMs() - last_eval;
if (num >= 0 && time_taken < interval) {
Thread.sleep(interval - time_taken);
}
last_eval = runTimeMs();
num = eval(valid, validResp);
if (num >= num_samples_total)
break;
if (mode != MapReduce) {
if (!isRunning(self()) || !running)
break;
} else {
//MapReduce calls cancel() early, we are waiting for running = false
if (!running)
break;
}
} while (true);
// remove validation data
if (adapted != null && adapted[1] != null)
adapted[1].delete();
Log.info("Training finished.");
} catch (Exception ex) {
cancel(ex);
}
}
private long eval(Vec[] valid, Vec validResp) {
long[][] cm = null;
if (classification) {
int classes = ls[ls.length - 1].units;
cm = new long[classes][classes];
}
NeuralNetModel model = new NeuralNetModel(destination_key, sourceKey, frame, ls, nn);
// score model on training set
Errors e = eval(train, trainResp, score_training, valid == null ? cm : null);
e.score_training = score_training == 0 ? train[0].length() : score_training;
trainErrors = Utils.append(trainErrors, e);
model.unstable |= Double.isNaN(e.mean_square) || Double.isNaN(e.cross_entropy);
model.training_errors = trainErrors;
// score model on validation set
if (valid != null) {
e = eval(valid, validResp, score_validation, cm);
e.score_validation = score_validation == 0 ? valid[0].length() : score_validation;
validErrors = Utils.append(validErrors, e);
model.unstable |= Double.isNaN(e.mean_square) || Double.isNaN(e.cross_entropy);
}
model.validation_errors = validErrors;
model.confusion_matrix = cm;
model.update(self());
// terminate model building if we detect that a model is unstable
if (model.unstable)
NeuralNet.running = false;
return e.training_samples;
}
private Errors eval(Vec[] vecs, Vec resp, long n, long[][] cm) {
Errors e = NeuralNet.eval(ls, vecs, resp, n, cm);
e.training_samples = trainer.processed();
e.training_time_ms = runTimeMs();
return e;
}
};
trainer.start();
monitor.start();
trainer.join();
// Gracefully terminate the job submitted via H2O web API
if (mode != MapReduce) {
//tell the monitor thread to finish too
running = false;
try {
monitor.join();
} catch (InterruptedException e) {
e.printStackTrace();
}
} else {
while (running) {
//MapReduce will inform us that running = false
try {
Thread.sleep(1);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
// remove this job -> stop H2O interface from refreshing
H2OCountedCompleter task = _fjtask;
if (task != null)
task.tryComplete();
this.remove();
}
use of hex.Layer in project h2o-2 by h2oai.
the class NeuralNetMnist method execImpl.
@Override
protected void execImpl() {
Frame trainf = TestUtil.parseFromH2OFolder("smalldata/mnist/train.csv.gz");
Frame testf = TestUtil.parseFromH2OFolder("smalldata/mnist/test.csv.gz");
train = trainf.vecs();
test = testf.vecs();
// Labels are on last column for this dataset
final Vec trainLabels = train[train.length - 1];
train = Utils.remove(train, train.length - 1);
final Vec testLabels = test[test.length - 1];
test = Utils.remove(test, test.length - 1);
final Layer[] ls = build(train, trainLabels, null, null);
// Monitor training
final Timer timer = new Timer();
final long start = System.nanoTime();
final AtomicInteger evals = new AtomicInteger(1);
timer.schedule(new TimerTask() {
@Override
public void run() {
if (!Job.isRunning(self()))
timer.cancel();
else {
double time = (System.nanoTime() - start) / 1e9;
Trainer trainer = _trainer;
long processed = trainer == null ? 0 : trainer.processed();
int ps = (int) (processed / time);
String text = (int) time + "s, " + processed + " samples (" + (ps) + "/s) ";
// Build separate nets for scoring purposes, use same normalization stats as for training
Layer[] temp = build(train, trainLabels, (VecsInput) ls[0], (VecSoftmax) ls[ls.length - 1]);
Layer.shareWeights(ls, temp);
// Estimate training error on subset of dataset for speed
Errors e = NeuralNet.eval(temp, 1000, null);
text += "train: " + e;
text += ", rate: ";
text += String.format("%.5g", ls[0].rate(processed));
text += ", momentum: ";
text += String.format("%.5g", ls[0].momentum(processed));
System.out.println(text);
if ((evals.incrementAndGet() % 1) == 0) {
System.out.println("Computing test error");
temp = build(test, testLabels, (VecsInput) ls[0], (VecSoftmax) ls[ls.length - 1]);
Layer.shareWeights(ls, temp);
e = NeuralNet.eval(temp, 0, null);
System.out.println("Test error: " + e);
}
}
}
}, 0, 10);
startTraining(ls);
}
Aggregations