use of hex.Layer.VecSoftmax in project h2o-2 by h2oai.
the class NeuralNetMnistPretrain method build.
@Override
protected Layer[] build(Vec[] data, Vec labels, VecsInput inputStats, VecSoftmax outputStats) {
Layer[] ls = new Layer[4];
ls[0] = new VecsInput(data, inputStats);
// ls[1] = new Layer.RectifierDropout(1024);
// ls[2] = new Layer.RectifierDropout(1024);
ls[1] = new Layer.Tanh(50);
ls[2] = new Layer.Tanh(50);
ls[3] = new VecSoftmax(labels, outputStats);
// Parameters for MNIST run
NeuralNet p = new NeuralNet();
//only used for NN run after pretraining
p.rate = 0.01;
p.activation = NeuralNet.Activation.Tanh;
p.loss = NeuralNet.Loss.CrossEntropy;
// p.rate_annealing = 1e-6f;
// p.max_w2 = 15;
// p.momentum_start = 0.5f;
// p.momentum_ramp = 60000 * 300;
// p.momentum_stable = 0.99f;
// p.l1 = .00001f;
// p.l2 = .00f;
p.initial_weight_distribution = NeuralNet.InitialWeightDistribution.UniformAdaptive;
for (int i = 0; i < ls.length; i++) {
ls[i].init(ls, i, p);
}
return ls;
}
use of hex.Layer.VecSoftmax in project h2o-2 by h2oai.
the class NeuralNetMnist method build.
protected Layer[] build(Vec[] data, Vec labels, VecsInput inputStats, VecSoftmax outputStats) {
//same parameters as in test_NN_mnist.py
Layer[] ls = new Layer[5];
ls[0] = new VecsInput(data, inputStats);
ls[1] = new Layer.RectifierDropout(117);
ls[2] = new Layer.RectifierDropout(131);
ls[3] = new Layer.RectifierDropout(129);
ls[ls.length - 1] = new VecSoftmax(labels, outputStats);
NeuralNet p = new NeuralNet();
p.seed = 98037452452l;
p.rate = 0.005;
p.rate_annealing = 1e-6;
p.activation = NeuralNet.Activation.RectifierWithDropout;
p.loss = NeuralNet.Loss.CrossEntropy;
p.input_dropout_ratio = 0.2;
p.max_w2 = 15;
p.epochs = 2;
p.l1 = 1e-5;
p.l2 = 0.0000001;
p.momentum_start = 0.5;
p.momentum_ramp = 100000;
p.momentum_stable = 0.99;
p.initial_weight_distribution = NeuralNet.InitialWeightDistribution.UniformAdaptive;
p.classification = true;
p.diagnostics = true;
p.expert_mode = true;
for (int i = 0; i < ls.length; i++) {
ls[i].init(ls, i, p);
}
return ls;
}
use of hex.Layer.VecSoftmax in project h2o-2 by h2oai.
the class NeuralNetMnist method execImpl.
@Override
protected void execImpl() {
Frame trainf = TestUtil.parseFromH2OFolder("smalldata/mnist/train.csv.gz");
Frame testf = TestUtil.parseFromH2OFolder("smalldata/mnist/test.csv.gz");
train = trainf.vecs();
test = testf.vecs();
// Labels are on last column for this dataset
final Vec trainLabels = train[train.length - 1];
train = Utils.remove(train, train.length - 1);
final Vec testLabels = test[test.length - 1];
test = Utils.remove(test, test.length - 1);
final Layer[] ls = build(train, trainLabels, null, null);
// Monitor training
final Timer timer = new Timer();
final long start = System.nanoTime();
final AtomicInteger evals = new AtomicInteger(1);
timer.schedule(new TimerTask() {
@Override
public void run() {
if (!Job.isRunning(self()))
timer.cancel();
else {
double time = (System.nanoTime() - start) / 1e9;
Trainer trainer = _trainer;
long processed = trainer == null ? 0 : trainer.processed();
int ps = (int) (processed / time);
String text = (int) time + "s, " + processed + " samples (" + (ps) + "/s) ";
// Build separate nets for scoring purposes, use same normalization stats as for training
Layer[] temp = build(train, trainLabels, (VecsInput) ls[0], (VecSoftmax) ls[ls.length - 1]);
Layer.shareWeights(ls, temp);
// Estimate training error on subset of dataset for speed
Errors e = NeuralNet.eval(temp, 1000, null);
text += "train: " + e;
text += ", rate: ";
text += String.format("%.5g", ls[0].rate(processed));
text += ", momentum: ";
text += String.format("%.5g", ls[0].momentum(processed));
System.out.println(text);
if ((evals.incrementAndGet() % 1) == 0) {
System.out.println("Computing test error");
temp = build(test, testLabels, (VecsInput) ls[0], (VecSoftmax) ls[ls.length - 1]);
Layer.shareWeights(ls, temp);
e = NeuralNet.eval(temp, 0, null);
System.out.println("Test error: " + e);
}
}
}
}, 0, 10);
startTraining(ls);
}
use of hex.Layer.VecSoftmax in project h2o-2 by h2oai.
the class NeuralNetMnistDrednet method build.
@Override
protected Layer[] build(Vec[] data, Vec labels, VecsInput inputStats, VecSoftmax outputStats) {
NeuralNet p = new NeuralNet();
Layer[] ls = new Layer[5];
p.hidden = new int[] { 1024, 1024, 2048 };
// p.hidden = new int[]{128,128,256};
ls[0] = new VecsInput(data, inputStats);
for (int i = 1; i < ls.length - 1; i++) ls[i] = new Layer.RectifierDropout(p.hidden[i - 1]);
ls[4] = new VecSoftmax(labels, outputStats);
p.rate = 0.01f;
p.rate_annealing = 1e-6f;
p.epochs = 1000;
p.activation = NeuralNet.Activation.RectifierWithDropout;
p.input_dropout_ratio = 0.2;
p.loss = NeuralNet.Loss.CrossEntropy;
p.max_w2 = 15;
p.momentum_start = 0.5f;
p.momentum_ramp = 1800000;
p.momentum_stable = 0.99f;
p.score_training = 1000;
p.score_validation = 10000;
p.l1 = .00001f;
p.l2 = .00f;
p.initial_weight_distribution = NeuralNet.InitialWeightDistribution.UniformAdaptive;
p.score_interval = 30;
for (int i = 0; i < ls.length; i++) {
ls[i].init(ls, i, p);
}
return ls;
}
use of hex.Layer.VecSoftmax in project h2o-2 by h2oai.
the class NeuralNetIrisTest method compare.
@Test
public void compare() throws Exception {
// Testing different things
// Note: Microsoft reference implementation is only for Tanh + MSE, rectifier and MCE are implemented by 0xdata (trivial).
// Note: Initial weight distributions are copied, but what is tested is the stability behavior.
NeuralNet.Activation[] activations = { NeuralNet.Activation.Tanh, NeuralNet.Activation.Rectifier };
Loss[] losses = { NeuralNet.Loss.MeanSquare, NeuralNet.Loss.CrossEntropy };
NeuralNet.InitialWeightDistribution[] dists = { NeuralNet.InitialWeightDistribution.Normal, //NeuralNet.InitialWeightDistribution.Uniform,
NeuralNet.InitialWeightDistribution.UniformAdaptive };
double[] initial_weight_scales = { 0.0258 };
double[] holdout_ratios = { 0.8 };
double[] epochs = { 1, 13 * 17 };
double[] rates = { 0.01 };
NeuralNet.ExecutionMode[] trainers = { NeuralNet.ExecutionMode.SingleThread };
final long seed0 = 0xDECAF;
int count = 0;
int hogwild_runs = 0;
int hogwild_errors = 0;
for (NeuralNet.ExecutionMode trainer : trainers) {
for (NeuralNet.Activation activation : activations) {
for (Loss loss : losses) {
for (NeuralNet.InitialWeightDistribution dist : dists) {
for (double scale : initial_weight_scales) {
for (double holdout_ratio : holdout_ratios) {
for (double epoch : epochs) {
for (double rate : rates) {
Log.info("");
Log.info("STARTING.");
Log.info("Running in " + trainer.name() + " mode with " + activation.name() + " activation function and " + loss.name() + " loss function.");
Log.info("Initialization with " + dist.name() + " distribution and " + scale + " scale, holdout ratio " + holdout_ratio);
NeuralNetMLPReference ref = new NeuralNetMLPReference();
final long seed = seed0 + count;
Log.info("Using seed " + seed);
ref.init(activation, water.util.Utils.getDeterRNG(seed), holdout_ratio);
// Parse Iris and shuffle the same way as ref
Key file = NFSFileVec.make(find_test_file(PATH));
Frame frame = ParseDataset2.parse(Key.make(), new Key[] { file });
double[][] rows = new double[(int) frame.numRows()][frame.numCols()];
for (int c = 0; c < frame.numCols(); c++) for (int r = 0; r < frame.numRows(); r++) rows[r][c] = frame.vecs()[c].at(r);
Random rand = water.util.Utils.getDeterRNG(seed);
for (int i = rows.length - 1; i >= 0; i--) {
int shuffle = rand.nextInt(i + 1);
double[] row = rows[shuffle];
rows[shuffle] = rows[i];
rows[i] = row;
}
int limit = (int) (frame.numRows() * holdout_ratio);
_train = frame(null, Utils.subarray(rows, 0, limit));
_test = frame(null, Utils.subarray(rows, limit, (int) frame.numRows() - limit));
Vec[] data = Utils.remove(_train.vecs(), _train.vecs().length - 1);
Vec labels = _train.vecs()[_train.vecs().length - 1];
NeuralNet p = new NeuralNet();
p.seed = seed;
p.rate = rate;
p.activation = activation;
p.max_w2 = Double.MAX_VALUE;
p.epochs = epoch;
p.activation = activation;
p.input_dropout_ratio = 0;
p.rate_annealing = 0;
p.l1 = 0;
p.l2 = 0;
p.momentum_start = 0;
p.momentum_ramp = 0;
p.momentum_stable = 0;
p.initial_weight_distribution = dist;
p.initial_weight_scale = scale;
p.diagnostics = true;
p.fast_mode = false;
p.loss = loss;
Layer[] ls = new Layer[3];
ls[0] = new VecsInput(data, null);
if (activation == NeuralNet.Activation.Tanh) {
ls[1] = new Tanh(7);
} else if (activation == NeuralNet.Activation.TanhWithDropout) {
ls[1] = new Layer.TanhDropout(7);
} else if (activation == NeuralNet.Activation.Rectifier) {
ls[1] = new Rectifier(7);
} else if (activation == NeuralNet.Activation.RectifierWithDropout) {
ls[1] = new Layer.RectifierDropout(7);
}
ls[2] = new VecSoftmax(labels, null);
for (int i = 0; i < ls.length; i++) {
ls[i].init(ls, i, p);
}
// use the same random weights for the reference implementation
Layer l = ls[1];
for (int o = 0; o < l._a.length; o++) {
for (int i = 0; i < l._previous._a.length; i++) {
// System.out.println("initial weight[" + o + "]=" + l._w[o * l._previous._a.length + i]);
ref._nn.ihWeights[i][o] = l._w[o * l._previous._a.length + i];
}
ref._nn.hBiases[o] = l._b[o];
// System.out.println("initial bias[" + o + "]=" + l._b[o]);
}
l = ls[2];
for (int o = 0; o < l._a.length; o++) {
for (int i = 0; i < l._previous._a.length; i++) {
// System.out.println("initial weight[" + o + "]=" + l._w[o * l._previous._a.length + i]);
ref._nn.hoWeights[i][o] = l._w[o * l._previous._a.length + i];
}
ref._nn.oBiases[o] = l._b[o];
// System.out.println("initial bias[" + o + "]=" + l._b[o]);
}
// Reference
ref.train((int) p.epochs, p.rate, loss);
// H2O
if (trainer == NeuralNet.ExecutionMode.SingleThread) {
new Trainer.Direct(ls, p.epochs, null).run();
} else if (trainer == NeuralNet.ExecutionMode.SingleNode) {
new Trainer.Threaded(ls, p.epochs, null, -1).run();
} else {
new Trainer.MapReduce(ls, p.epochs, null).run();
}
// tiny absolute and relative tolerances for single threaded mode
double abseps = 1e-4;
// relative error check only triggers if abs(a-b) > abseps
double releps = 1e-4;
double weight_mse = 0;
// Make sure weights are equal
l = ls[1];
for (int o = 0; o < l._a.length; o++) {
for (int i = 0; i < l._previous._a.length; i++) {
double a = ref._nn.ihWeights[i][o];
double b = l._w[o * l._previous._a.length + i];
if (trainer == NeuralNet.ExecutionMode.SingleThread) {
compareVal(a, b, abseps, releps);
// System.out.println("weight[" + o + "]=" + b);
} else {
weight_mse += (a - b) * (a - b);
}
}
}
weight_mse /= l._a.length * l._previous._a.length;
// Make sure output layer (predictions) are equal
for (int o = 0; o < ls[2]._a.length; o++) {
double a = ref._nn.outputs[o];
double b = ls[2]._a[o];
if (trainer == NeuralNet.ExecutionMode.SingleThread) {
compareVal(a, b, abseps, releps);
}
}
// Make sure overall classification accuracy is equal
NeuralNet.Errors train = NeuralNet.eval(ls, 0, null);
data = Utils.remove(_test.vecs(), _test.vecs().length - 1);
labels = _test.vecs()[_test.vecs().length - 1];
VecsInput input = (VecsInput) ls[0];
input.vecs = data;
input._len = data[0].length();
((VecSoftmax) ls[2]).vec = labels;
NeuralNet.Errors test = NeuralNet.eval(ls, 0, null);
double trainAcc = ref._nn.Accuracy(ref._trainData);
double testAcc = ref._nn.Accuracy(ref._testData);
if (trainer == NeuralNet.ExecutionMode.SingleThread) {
compareVal(trainAcc, train.classification, abseps, releps);
compareVal(testAcc, test.classification, abseps, releps);
Log.info("DONE. Single-threaded mode shows exact agreement with reference results.");
} else {
final boolean hogwild_error = (trainAcc != train.classification || testAcc != test.classification);
Log.info("DONE. " + (hogwild_error ? "Threaded mode resulted in errors due to Hogwild." : ""));
Log.info("MSE of Hogwild H2O weights: " + weight_mse + ".");
hogwild_errors += hogwild_error ? 1 : 0;
}
Log.info("H2O training error : " + train.classification * 100 + "%, test error: " + test.classification * 100 + "%" + (trainAcc != train.classification || testAcc != test.classification ? " HOGWILD! " : ""));
Log.info("REF training error : " + trainAcc * 100 + "%, test error: " + testAcc * 100 + "%");
frame.delete();
for (Layer l1 : ls) l1.close();
_train.delete();
_test.delete();
if (trainer != NeuralNet.ExecutionMode.SingleThread) {
hogwild_runs++;
}
count++;
}
}
}
}
}
}
}
}
Log.info("===============================================================");
Log.info("Number of differences due to Hogwild: " + hogwild_errors + " (out of " + hogwild_runs + " runs).");
Log.info("===============================================================");
}
Aggregations