Examples with VecSoftmax - hex.Layer.VecSoftmax

Example 1 with VecSoftmax

use of hex.Layer.VecSoftmax in project h2o-2 by h2oai.

the class NeuralNetMnistPretrain method build.

@Override
protected Layer[] build(Vec[] data, Vec labels, VecsInput inputStats, VecSoftmax outputStats) {
    Layer[] ls = new Layer[4];
    ls[0] = new VecsInput(data, inputStats);
    //    ls[1] = new Layer.RectifierDropout(1024);
    //    ls[2] = new Layer.RectifierDropout(1024);
    ls[1] = new Layer.Tanh(50);
    ls[2] = new Layer.Tanh(50);
    ls[3] = new VecSoftmax(labels, outputStats);
    // Parameters for MNIST run
    NeuralNet p = new NeuralNet();
    //only used for NN run after pretraining
    p.rate = 0.01;
    p.activation = NeuralNet.Activation.Tanh;
    p.loss = NeuralNet.Loss.CrossEntropy;
    //    p.rate_annealing = 1e-6f;
    //    p.max_w2 = 15;
    //    p.momentum_start = 0.5f;
    //    p.momentum_ramp = 60000 * 300;
    //    p.momentum_stable = 0.99f;
    //    p.l1 = .00001f;
    //    p.l2 = .00f;
    p.initial_weight_distribution = NeuralNet.InitialWeightDistribution.UniformAdaptive;
    for (int i = 0; i < ls.length; i++) {
        ls[i].init(ls, i, p);
    }
    return ls;
}

Also used : VecSoftmax(hex.Layer.VecSoftmax) NeuralNet(hex.NeuralNet) VecsInput(hex.Layer.VecsInput) Layer(hex.Layer)

Example 2 with VecSoftmax

use of hex.Layer.VecSoftmax in project h2o-2 by h2oai.

the class NeuralNetMnist method build.

protected Layer[] build(Vec[] data, Vec labels, VecsInput inputStats, VecSoftmax outputStats) {
    //same parameters as in test_NN_mnist.py
    Layer[] ls = new Layer[5];
    ls[0] = new VecsInput(data, inputStats);
    ls[1] = new Layer.RectifierDropout(117);
    ls[2] = new Layer.RectifierDropout(131);
    ls[3] = new Layer.RectifierDropout(129);
    ls[ls.length - 1] = new VecSoftmax(labels, outputStats);
    NeuralNet p = new NeuralNet();
    p.seed = 98037452452l;
    p.rate = 0.005;
    p.rate_annealing = 1e-6;
    p.activation = NeuralNet.Activation.RectifierWithDropout;
    p.loss = NeuralNet.Loss.CrossEntropy;
    p.input_dropout_ratio = 0.2;
    p.max_w2 = 15;
    p.epochs = 2;
    p.l1 = 1e-5;
    p.l2 = 0.0000001;
    p.momentum_start = 0.5;
    p.momentum_ramp = 100000;
    p.momentum_stable = 0.99;
    p.initial_weight_distribution = NeuralNet.InitialWeightDistribution.UniformAdaptive;
    p.classification = true;
    p.diagnostics = true;
    p.expert_mode = true;
    for (int i = 0; i < ls.length; i++) {
        ls[i].init(ls, i, p);
    }
    return ls;
}

Also used : VecSoftmax(hex.Layer.VecSoftmax) NeuralNet(hex.NeuralNet) VecsInput(hex.Layer.VecsInput) Layer(hex.Layer)

Example 3 with VecSoftmax

use of hex.Layer.VecSoftmax in project h2o-2 by h2oai.

the class NeuralNetMnist method execImpl.

@Override
protected void execImpl() {
    Frame trainf = TestUtil.parseFromH2OFolder("smalldata/mnist/train.csv.gz");
    Frame testf = TestUtil.parseFromH2OFolder("smalldata/mnist/test.csv.gz");
    train = trainf.vecs();
    test = testf.vecs();
    // Labels are on last column for this dataset
    final Vec trainLabels = train[train.length - 1];
    train = Utils.remove(train, train.length - 1);
    final Vec testLabels = test[test.length - 1];
    test = Utils.remove(test, test.length - 1);
    final Layer[] ls = build(train, trainLabels, null, null);
    // Monitor training
    final Timer timer = new Timer();
    final long start = System.nanoTime();
    final AtomicInteger evals = new AtomicInteger(1);
    timer.schedule(new TimerTask() {

        @Override
        public void run() {
            if (!Job.isRunning(self()))
                timer.cancel();
            else {
                double time = (System.nanoTime() - start) / 1e9;
                Trainer trainer = _trainer;
                long processed = trainer == null ? 0 : trainer.processed();
                int ps = (int) (processed / time);
                String text = (int) time + "s, " + processed + " samples (" + (ps) + "/s) ";
                // Build separate nets for scoring purposes, use same normalization stats as for training
                Layer[] temp = build(train, trainLabels, (VecsInput) ls[0], (VecSoftmax) ls[ls.length - 1]);
                Layer.shareWeights(ls, temp);
                // Estimate training error on subset of dataset for speed
                Errors e = NeuralNet.eval(temp, 1000, null);
                text += "train: " + e;
                text += ", rate: ";
                text += String.format("%.5g", ls[0].rate(processed));
                text += ", momentum: ";
                text += String.format("%.5g", ls[0].momentum(processed));
                System.out.println(text);
                if ((evals.incrementAndGet() % 1) == 0) {
                    System.out.println("Computing test error");
                    temp = build(test, testLabels, (VecsInput) ls[0], (VecSoftmax) ls[ls.length - 1]);
                    Layer.shareWeights(ls, temp);
                    e = NeuralNet.eval(temp, 0, null);
                    System.out.println("Test error: " + e);
                }
            }
        }
    }, 0, 10);
    startTraining(ls);
}

Also used : Frame(water.fvec.Frame) VecSoftmax(hex.Layer.VecSoftmax) Trainer(hex.Trainer) Layer(hex.Layer) Errors(hex.NeuralNet.Errors) Timer(java.util.Timer) TimerTask(java.util.TimerTask) Vec(water.fvec.Vec) AppendableVec(water.fvec.AppendableVec) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) VecsInput(hex.Layer.VecsInput)

Example 4 with VecSoftmax

use of hex.Layer.VecSoftmax in project h2o-2 by h2oai.

the class NeuralNetMnistDrednet method build.

@Override
protected Layer[] build(Vec[] data, Vec labels, VecsInput inputStats, VecSoftmax outputStats) {
    NeuralNet p = new NeuralNet();
    Layer[] ls = new Layer[5];
    p.hidden = new int[] { 1024, 1024, 2048 };
    //    p.hidden = new int[]{128,128,256};
    ls[0] = new VecsInput(data, inputStats);
    for (int i = 1; i < ls.length - 1; i++) ls[i] = new Layer.RectifierDropout(p.hidden[i - 1]);
    ls[4] = new VecSoftmax(labels, outputStats);
    p.rate = 0.01f;
    p.rate_annealing = 1e-6f;
    p.epochs = 1000;
    p.activation = NeuralNet.Activation.RectifierWithDropout;
    p.input_dropout_ratio = 0.2;
    p.loss = NeuralNet.Loss.CrossEntropy;
    p.max_w2 = 15;
    p.momentum_start = 0.5f;
    p.momentum_ramp = 1800000;
    p.momentum_stable = 0.99f;
    p.score_training = 1000;
    p.score_validation = 10000;
    p.l1 = .00001f;
    p.l2 = .00f;
    p.initial_weight_distribution = NeuralNet.InitialWeightDistribution.UniformAdaptive;
    p.score_interval = 30;
    for (int i = 0; i < ls.length; i++) {
        ls[i].init(ls, i, p);
    }
    return ls;
}

Also used : NeuralNet(hex.NeuralNet) VecSoftmax(hex.Layer.VecSoftmax) VecsInput(hex.Layer.VecsInput) Layer(hex.Layer)

Example 5 with VecSoftmax

use of hex.Layer.VecSoftmax in project h2o-2 by h2oai.

the class NeuralNetIrisTest method compare.

@Test
public void compare() throws Exception {
    // Testing different things
    // Note: Microsoft reference implementation is only for Tanh + MSE, rectifier and MCE are implemented by 0xdata (trivial).
    // Note: Initial weight distributions are copied, but what is tested is the stability behavior.
    NeuralNet.Activation[] activations = { NeuralNet.Activation.Tanh, NeuralNet.Activation.Rectifier };
    Loss[] losses = { NeuralNet.Loss.MeanSquare, NeuralNet.Loss.CrossEntropy };
    NeuralNet.InitialWeightDistribution[] dists = { NeuralNet.InitialWeightDistribution.Normal, //NeuralNet.InitialWeightDistribution.Uniform,
    NeuralNet.InitialWeightDistribution.UniformAdaptive };
    double[] initial_weight_scales = { 0.0258 };
    double[] holdout_ratios = { 0.8 };
    double[] epochs = { 1, 13 * 17 };
    double[] rates = { 0.01 };
    NeuralNet.ExecutionMode[] trainers = { NeuralNet.ExecutionMode.SingleThread };
    final long seed0 = 0xDECAF;
    int count = 0;
    int hogwild_runs = 0;
    int hogwild_errors = 0;
    for (NeuralNet.ExecutionMode trainer : trainers) {
        for (NeuralNet.Activation activation : activations) {
            for (Loss loss : losses) {
                for (NeuralNet.InitialWeightDistribution dist : dists) {
                    for (double scale : initial_weight_scales) {
                        for (double holdout_ratio : holdout_ratios) {
                            for (double epoch : epochs) {
                                for (double rate : rates) {
                                    Log.info("");
                                    Log.info("STARTING.");
                                    Log.info("Running in " + trainer.name() + " mode with " + activation.name() + " activation function and " + loss.name() + " loss function.");
                                    Log.info("Initialization with " + dist.name() + " distribution and " + scale + " scale, holdout ratio " + holdout_ratio);
                                    NeuralNetMLPReference ref = new NeuralNetMLPReference();
                                    final long seed = seed0 + count;
                                    Log.info("Using seed " + seed);
                                    ref.init(activation, water.util.Utils.getDeterRNG(seed), holdout_ratio);
                                    // Parse Iris and shuffle the same way as ref
                                    Key file = NFSFileVec.make(find_test_file(PATH));
                                    Frame frame = ParseDataset2.parse(Key.make(), new Key[] { file });
                                    double[][] rows = new double[(int) frame.numRows()][frame.numCols()];
                                    for (int c = 0; c < frame.numCols(); c++) for (int r = 0; r < frame.numRows(); r++) rows[r][c] = frame.vecs()[c].at(r);
                                    Random rand = water.util.Utils.getDeterRNG(seed);
                                    for (int i = rows.length - 1; i >= 0; i--) {
                                        int shuffle = rand.nextInt(i + 1);
                                        double[] row = rows[shuffle];
                                        rows[shuffle] = rows[i];
                                        rows[i] = row;
                                    }
                                    int limit = (int) (frame.numRows() * holdout_ratio);
                                    _train = frame(null, Utils.subarray(rows, 0, limit));
                                    _test = frame(null, Utils.subarray(rows, limit, (int) frame.numRows() - limit));
                                    Vec[] data = Utils.remove(_train.vecs(), _train.vecs().length - 1);
                                    Vec labels = _train.vecs()[_train.vecs().length - 1];
                                    NeuralNet p = new NeuralNet();
                                    p.seed = seed;
                                    p.rate = rate;
                                    p.activation = activation;
                                    p.max_w2 = Double.MAX_VALUE;
                                    p.epochs = epoch;
                                    p.activation = activation;
                                    p.input_dropout_ratio = 0;
                                    p.rate_annealing = 0;
                                    p.l1 = 0;
                                    p.l2 = 0;
                                    p.momentum_start = 0;
                                    p.momentum_ramp = 0;
                                    p.momentum_stable = 0;
                                    p.initial_weight_distribution = dist;
                                    p.initial_weight_scale = scale;
                                    p.diagnostics = true;
                                    p.fast_mode = false;
                                    p.loss = loss;
                                    Layer[] ls = new Layer[3];
                                    ls[0] = new VecsInput(data, null);
                                    if (activation == NeuralNet.Activation.Tanh) {
                                        ls[1] = new Tanh(7);
                                    } else if (activation == NeuralNet.Activation.TanhWithDropout) {
                                        ls[1] = new Layer.TanhDropout(7);
                                    } else if (activation == NeuralNet.Activation.Rectifier) {
                                        ls[1] = new Rectifier(7);
                                    } else if (activation == NeuralNet.Activation.RectifierWithDropout) {
                                        ls[1] = new Layer.RectifierDropout(7);
                                    }
                                    ls[2] = new VecSoftmax(labels, null);
                                    for (int i = 0; i < ls.length; i++) {
                                        ls[i].init(ls, i, p);
                                    }
                                    // use the same random weights for the reference implementation
                                    Layer l = ls[1];
                                    for (int o = 0; o < l._a.length; o++) {
                                        for (int i = 0; i < l._previous._a.length; i++) {
                                            //                    System.out.println("initial weight[" + o + "]=" + l._w[o * l._previous._a.length + i]);
                                            ref._nn.ihWeights[i][o] = l._w[o * l._previous._a.length + i];
                                        }
                                        ref._nn.hBiases[o] = l._b[o];
                                    //                  System.out.println("initial bias[" + o + "]=" + l._b[o]);
                                    }
                                    l = ls[2];
                                    for (int o = 0; o < l._a.length; o++) {
                                        for (int i = 0; i < l._previous._a.length; i++) {
                                            //                    System.out.println("initial weight[" + o + "]=" + l._w[o * l._previous._a.length + i]);
                                            ref._nn.hoWeights[i][o] = l._w[o * l._previous._a.length + i];
                                        }
                                        ref._nn.oBiases[o] = l._b[o];
                                    //                  System.out.println("initial bias[" + o + "]=" + l._b[o]);
                                    }
                                    // Reference
                                    ref.train((int) p.epochs, p.rate, loss);
                                    // H2O
                                    if (trainer == NeuralNet.ExecutionMode.SingleThread) {
                                        new Trainer.Direct(ls, p.epochs, null).run();
                                    } else if (trainer == NeuralNet.ExecutionMode.SingleNode) {
                                        new Trainer.Threaded(ls, p.epochs, null, -1).run();
                                    } else {
                                        new Trainer.MapReduce(ls, p.epochs, null).run();
                                    }
                                    // tiny absolute and relative tolerances for single threaded mode
                                    double abseps = 1e-4;
                                    // relative error check only triggers if abs(a-b) > abseps
                                    double releps = 1e-4;
                                    double weight_mse = 0;
                                    // Make sure weights are equal
                                    l = ls[1];
                                    for (int o = 0; o < l._a.length; o++) {
                                        for (int i = 0; i < l._previous._a.length; i++) {
                                            double a = ref._nn.ihWeights[i][o];
                                            double b = l._w[o * l._previous._a.length + i];
                                            if (trainer == NeuralNet.ExecutionMode.SingleThread) {
                                                compareVal(a, b, abseps, releps);
                                            //                      System.out.println("weight[" + o + "]=" + b);
                                            } else {
                                                weight_mse += (a - b) * (a - b);
                                            }
                                        }
                                    }
                                    weight_mse /= l._a.length * l._previous._a.length;
                                    // Make sure output layer (predictions) are equal
                                    for (int o = 0; o < ls[2]._a.length; o++) {
                                        double a = ref._nn.outputs[o];
                                        double b = ls[2]._a[o];
                                        if (trainer == NeuralNet.ExecutionMode.SingleThread) {
                                            compareVal(a, b, abseps, releps);
                                        }
                                    }
                                    // Make sure overall classification accuracy is equal
                                    NeuralNet.Errors train = NeuralNet.eval(ls, 0, null);
                                    data = Utils.remove(_test.vecs(), _test.vecs().length - 1);
                                    labels = _test.vecs()[_test.vecs().length - 1];
                                    VecsInput input = (VecsInput) ls[0];
                                    input.vecs = data;
                                    input._len = data[0].length();
                                    ((VecSoftmax) ls[2]).vec = labels;
                                    NeuralNet.Errors test = NeuralNet.eval(ls, 0, null);
                                    double trainAcc = ref._nn.Accuracy(ref._trainData);
                                    double testAcc = ref._nn.Accuracy(ref._testData);
                                    if (trainer == NeuralNet.ExecutionMode.SingleThread) {
                                        compareVal(trainAcc, train.classification, abseps, releps);
                                        compareVal(testAcc, test.classification, abseps, releps);
                                        Log.info("DONE. Single-threaded mode shows exact agreement with reference results.");
                                    } else {
                                        final boolean hogwild_error = (trainAcc != train.classification || testAcc != test.classification);
                                        Log.info("DONE. " + (hogwild_error ? "Threaded mode resulted in errors due to Hogwild." : ""));
                                        Log.info("MSE of Hogwild H2O weights: " + weight_mse + ".");
                                        hogwild_errors += hogwild_error ? 1 : 0;
                                    }
                                    Log.info("H2O  training error : " + train.classification * 100 + "%, test error: " + test.classification * 100 + "%" + (trainAcc != train.classification || testAcc != test.classification ? " HOGWILD! " : ""));
                                    Log.info("REF  training error : " + trainAcc * 100 + "%, test error: " + testAcc * 100 + "%");
                                    frame.delete();
                                    for (Layer l1 : ls) l1.close();
                                    _train.delete();
                                    _test.delete();
                                    if (trainer != NeuralNet.ExecutionMode.SingleThread) {
                                        hogwild_runs++;
                                    }
                                    count++;
                                }
                            }
                        }
                    }
                }
            }
        }
    }
    Log.info("===============================================================");
    Log.info("Number of differences due to Hogwild: " + hogwild_errors + " (out of " + hogwild_runs + " runs).");
    Log.info("===============================================================");
}

Also used : Frame(water.fvec.Frame) VecSoftmax(hex.Layer.VecSoftmax) Random(java.util.Random) VecsInput(hex.Layer.VecsInput) Tanh(hex.Layer.Tanh) Rectifier(hex.Layer.Rectifier) Loss(hex.NeuralNet.Loss) NFSFileVec(water.fvec.NFSFileVec) Vec(water.fvec.Vec) Key(water.Key) Test(org.junit.Test)

Aggregations

VecSoftmax (hex.Layer.VecSoftmax)6 VecsInput (hex.Layer.VecsInput)6 Layer (hex.Layer)4 NeuralNet (hex.NeuralNet)3 Frame (water.fvec.Frame)3 Vec (water.fvec.Vec)3 Test (org.junit.Test)2 Key (water.Key)2 NFSFileVec (water.fvec.NFSFileVec)2 Rectifier (hex.Layer.Rectifier)1 Tanh (hex.Layer.Tanh)1 Errors (hex.NeuralNet.Errors)1 Loss (hex.NeuralNet.Loss)1 Trainer (hex.Trainer)1 Random (java.util.Random)1 Timer (java.util.Timer)1 TimerTask (java.util.TimerTask)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 AppendableVec (water.fvec.AppendableVec)1