Search in sources :

Example 1 with Layer

use of hex.Layer in project h2o-2 by h2oai.

the class NeuralNetMnistPretrain method preTrain.

private final void preTrain(Layer[] ls, int index, int epochs) {
    // Build a network with same layers below 'index', and an auto-encoder at the top
    Layer[] pre = new Layer[index + 2];
    VecsInput input = (VecsInput) ls[0];
    pre[0] = new VecsInput(input.vecs, input);
    //clone the parameters
    pre[0].init(pre, 0, ls[0].params);
    for (int i = 1; i < index; i++) {
        //pre[i] = new Layer.Rectifier(ls[i].units);
        pre[i] = new Layer.Tanh(ls[i].units);
        Layer.shareWeights(ls[i], pre[i]);
        //share the parameters
        pre[i].init(pre, i, ls[i].params);
        //turn off training for these layers
        pre[i].params.rate = 0;
    }
    // Auto-encoder is a layer and a reverse layer on top
    //pre[index] = new Layer.Rectifier(ls[index].units);
    //pre[index + 1] = new Layer.RectifierPrime(ls[index - 1].units);
    pre[index] = new Layer.Tanh(ls[index].units);
    pre[index].init(pre, index, ls[index].params);
    pre[index].params.rate = 1e-5;
    pre[index + 1] = new Layer.TanhPrime(ls[index - 1].units);
    pre[index + 1].init(pre, index + 1, pre[index].params);
    pre[index + 1].params.rate = 1e-5;
    Layer.shareWeights(ls[index], pre[index]);
    Layer.shareWeights(ls[index], pre[index + 1]);
    _trainer = new Trainer.Direct(pre, epochs, self());
    // Basic visualization of images and weights
    JFrame frame = new JFrame("H2O Pre-Training");
    frame.setDefaultCloseOperation(WindowConstants.EXIT_ON_CLOSE);
    MnistCanvas canvas = new MnistCanvas(_trainer);
    frame.setContentPane(canvas.init());
    frame.pack();
    frame.setLocationRelativeTo(null);
    frame.setVisible(true);
    _trainer.start();
    _trainer.join();
}
Also used : MnistCanvas(hex.MnistCanvas) VecsInput(hex.Layer.VecsInput) Trainer(hex.Trainer) Layer(hex.Layer)

Example 2 with Layer

use of hex.Layer in project h2o-2 by h2oai.

the class NeuralNetMnistPretrain method build.

@Override
protected Layer[] build(Vec[] data, Vec labels, VecsInput inputStats, VecSoftmax outputStats) {
    Layer[] ls = new Layer[4];
    ls[0] = new VecsInput(data, inputStats);
    //    ls[1] = new Layer.RectifierDropout(1024);
    //    ls[2] = new Layer.RectifierDropout(1024);
    ls[1] = new Layer.Tanh(50);
    ls[2] = new Layer.Tanh(50);
    ls[3] = new VecSoftmax(labels, outputStats);
    // Parameters for MNIST run
    NeuralNet p = new NeuralNet();
    //only used for NN run after pretraining
    p.rate = 0.01;
    p.activation = NeuralNet.Activation.Tanh;
    p.loss = NeuralNet.Loss.CrossEntropy;
    //    p.rate_annealing = 1e-6f;
    //    p.max_w2 = 15;
    //    p.momentum_start = 0.5f;
    //    p.momentum_ramp = 60000 * 300;
    //    p.momentum_stable = 0.99f;
    //    p.l1 = .00001f;
    //    p.l2 = .00f;
    p.initial_weight_distribution = NeuralNet.InitialWeightDistribution.UniformAdaptive;
    for (int i = 0; i < ls.length; i++) {
        ls[i].init(ls, i, p);
    }
    return ls;
}
Also used : VecSoftmax(hex.Layer.VecSoftmax) NeuralNet(hex.NeuralNet) VecsInput(hex.Layer.VecsInput) Layer(hex.Layer)

Example 3 with Layer

use of hex.Layer in project h2o-2 by h2oai.

the class NeuralNetMnist method build.

protected Layer[] build(Vec[] data, Vec labels, VecsInput inputStats, VecSoftmax outputStats) {
    //same parameters as in test_NN_mnist.py
    Layer[] ls = new Layer[5];
    ls[0] = new VecsInput(data, inputStats);
    ls[1] = new Layer.RectifierDropout(117);
    ls[2] = new Layer.RectifierDropout(131);
    ls[3] = new Layer.RectifierDropout(129);
    ls[ls.length - 1] = new VecSoftmax(labels, outputStats);
    NeuralNet p = new NeuralNet();
    p.seed = 98037452452l;
    p.rate = 0.005;
    p.rate_annealing = 1e-6;
    p.activation = NeuralNet.Activation.RectifierWithDropout;
    p.loss = NeuralNet.Loss.CrossEntropy;
    p.input_dropout_ratio = 0.2;
    p.max_w2 = 15;
    p.epochs = 2;
    p.l1 = 1e-5;
    p.l2 = 0.0000001;
    p.momentum_start = 0.5;
    p.momentum_ramp = 100000;
    p.momentum_stable = 0.99;
    p.initial_weight_distribution = NeuralNet.InitialWeightDistribution.UniformAdaptive;
    p.classification = true;
    p.diagnostics = true;
    p.expert_mode = true;
    for (int i = 0; i < ls.length; i++) {
        ls[i].init(ls, i, p);
    }
    return ls;
}
Also used : VecSoftmax(hex.Layer.VecSoftmax) NeuralNet(hex.NeuralNet) VecsInput(hex.Layer.VecsInput) Layer(hex.Layer)

Example 4 with Layer

use of hex.Layer in project h2o-2 by h2oai.

the class NeuralNet method startTrain.

void startTrain() {
    logStart();
    running = true;
    // Vec[] vecs = Utils.append(_train, response);
    // reChunk(vecs);
    // final Vec[] train = new Vec[vecs.length - 1];
    // System.arraycopy(vecs, 0, train, 0, train.length);
    // final Vec trainResp = classification ? vecs[vecs.length - 1].toEnum() : vecs[vecs.length - 1];
    final Vec[] train = _train;
    final Vec trainResp = classification ? response.toEnum() : response;
    final Layer[] ls = new Layer[hidden.length + 2];
    ls[0] = new VecsInput(train, null);
    for (int i = 0; i < hidden.length; i++) {
        switch(activation) {
            case Tanh:
                ls[i + 1] = new Tanh(hidden[i]);
                break;
            case TanhWithDropout:
                ls[i + 1] = new TanhDropout(hidden[i]);
                break;
            case Rectifier:
                ls[i + 1] = new Rectifier(hidden[i]);
                break;
            case RectifierWithDropout:
                ls[i + 1] = new RectifierDropout(hidden[i]);
                break;
            case Maxout:
                ls[i + 1] = new Maxout(hidden[i]);
                break;
            case MaxoutWithDropout:
                ls[i + 1] = new MaxoutDropout(hidden[i]);
                break;
        }
    }
    if (classification)
        ls[ls.length - 1] = new VecSoftmax(trainResp, null);
    else
        ls[ls.length - 1] = new VecLinear(trainResp, null);
    //copy parameters from NeuralNet, and set previous/input layer links
    for (int i = 0; i < ls.length; i++) ls[i].init(ls, i, this);
    final Key sourceKey = Key.make(input("source"));
    final Frame frame = new Frame(_names, train);
    frame.add(_responseName, trainResp);
    final Errors[] trainErrors0 = new Errors[] { new Errors() };
    final Errors[] validErrors0 = validation == null ? null : new Errors[] { new Errors() };
    NeuralNetModel model = new NeuralNetModel(destination_key, sourceKey, frame, ls, this);
    model.training_errors = trainErrors0;
    model.validation_errors = validErrors0;
    model.delete_and_lock(self());
    final Frame[] adapted = validation == null ? null : model.adapt(validation, false);
    final Trainer trainer;
    final long num_rows = source.numRows();
    if (mode == SingleThread) {
        Log.info("Entering single-threaded execution mode");
        trainer = new Trainer.Direct(ls, epochs, self());
    } else {
        // one node works on the first batch of points serially for improved stability
        if (warmup_samples > 0) {
            Log.info("Training the first " + warmup_samples + " samples in serial for improved stability.");
            Trainer warmup = new Trainer.Direct(ls, (double) warmup_samples / num_rows, self());
            warmup.start();
            warmup.join();
        //TODO: for MapReduce send weights from master VM to all other VMs
        }
        if (mode == SingleNode) {
            Log.info("Entering single-node (multi-threaded Hogwild) execution mode.");
            trainer = new Trainer.Threaded(ls, epochs, self(), -1);
        } else if (mode == MapReduce) {
            if (warmup_samples > 0 && mode == MapReduce) {
                Log.info("Multi-threaded warmup with " + warmup_samples + " samples.");
                Trainer warmup = new Trainer.Threaded(ls, (double) warmup_samples / num_rows, self(), -1);
                warmup.start();
                warmup.join();
            //TODO: for MapReduce send weights from master VM to all other VMs
            }
            Log.info("Entering multi-node (MapReduce + multi-threaded Hogwild) execution mode.");
            trainer = new Trainer.MapReduce(ls, epochs, self());
        } else
            throw new RuntimeException("invalid execution mode.");
    }
    Log.info("Running for " + epochs + " epochs.");
    final NeuralNet nn = this;
    // Use a separate thread for monitoring (blocked most of the time)
    Thread monitor = new Thread() {

        Errors[] trainErrors = trainErrors0, validErrors = validErrors0;

        @Override
        public void run() {
            try {
                Vec[] valid = null;
                Vec validResp = null;
                if (validation != null) {
                    assert adapted != null;
                    final Vec[] vs = adapted[0].vecs();
                    valid = Arrays.copyOf(vs, vs.length - 1);
                    System.arraycopy(adapted[0].vecs(), 0, valid, 0, valid.length);
                    validResp = vs[vs.length - 1];
                }
                //score the model every 2 seconds (or less often, if it takes longer to score)
                final long num_samples_total = (long) (Math.ceil(num_rows * epochs));
                long num = -1, last_eval = runTimeMs();
                do {
                    //time between evaluations
                    final long interval = (long) (score_interval * 1000);
                    long time_taken = runTimeMs() - last_eval;
                    if (num >= 0 && time_taken < interval) {
                        Thread.sleep(interval - time_taken);
                    }
                    last_eval = runTimeMs();
                    num = eval(valid, validResp);
                    if (num >= num_samples_total)
                        break;
                    if (mode != MapReduce) {
                        if (!isRunning(self()) || !running)
                            break;
                    } else {
                        //MapReduce calls cancel() early, we are waiting for running = false
                        if (!running)
                            break;
                    }
                } while (true);
                // remove validation data
                if (adapted != null && adapted[1] != null)
                    adapted[1].delete();
                Log.info("Training finished.");
            } catch (Exception ex) {
                cancel(ex);
            }
        }

        private long eval(Vec[] valid, Vec validResp) {
            long[][] cm = null;
            if (classification) {
                int classes = ls[ls.length - 1].units;
                cm = new long[classes][classes];
            }
            NeuralNetModel model = new NeuralNetModel(destination_key, sourceKey, frame, ls, nn);
            // score model on training set
            Errors e = eval(train, trainResp, score_training, valid == null ? cm : null);
            e.score_training = score_training == 0 ? train[0].length() : score_training;
            trainErrors = Utils.append(trainErrors, e);
            model.unstable |= Double.isNaN(e.mean_square) || Double.isNaN(e.cross_entropy);
            model.training_errors = trainErrors;
            // score model on validation set
            if (valid != null) {
                e = eval(valid, validResp, score_validation, cm);
                e.score_validation = score_validation == 0 ? valid[0].length() : score_validation;
                validErrors = Utils.append(validErrors, e);
                model.unstable |= Double.isNaN(e.mean_square) || Double.isNaN(e.cross_entropy);
            }
            model.validation_errors = validErrors;
            model.confusion_matrix = cm;
            model.update(self());
            // terminate model building if we detect that a model is unstable
            if (model.unstable)
                NeuralNet.running = false;
            return e.training_samples;
        }

        private Errors eval(Vec[] vecs, Vec resp, long n, long[][] cm) {
            Errors e = NeuralNet.eval(ls, vecs, resp, n, cm);
            e.training_samples = trainer.processed();
            e.training_time_ms = runTimeMs();
            return e;
        }
    };
    trainer.start();
    monitor.start();
    trainer.join();
    // Gracefully terminate the job submitted via H2O web API
    if (mode != MapReduce) {
        //tell the monitor thread to finish too
        running = false;
        try {
            monitor.join();
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    } else {
        while (running) {
            //MapReduce will inform us that running = false
            try {
                Thread.sleep(1);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    }
    // remove this job -> stop H2O interface from refreshing
    H2OCountedCompleter task = _fjtask;
    if (task != null)
        task.tryComplete();
    this.remove();
}
Also used : H2OCountedCompleter(water.H2O.H2OCountedCompleter) Layer(hex.Layer)

Example 5 with Layer

use of hex.Layer in project h2o-2 by h2oai.

the class NeuralNetMnist method execImpl.

@Override
protected void execImpl() {
    Frame trainf = TestUtil.parseFromH2OFolder("smalldata/mnist/train.csv.gz");
    Frame testf = TestUtil.parseFromH2OFolder("smalldata/mnist/test.csv.gz");
    train = trainf.vecs();
    test = testf.vecs();
    // Labels are on last column for this dataset
    final Vec trainLabels = train[train.length - 1];
    train = Utils.remove(train, train.length - 1);
    final Vec testLabels = test[test.length - 1];
    test = Utils.remove(test, test.length - 1);
    final Layer[] ls = build(train, trainLabels, null, null);
    // Monitor training
    final Timer timer = new Timer();
    final long start = System.nanoTime();
    final AtomicInteger evals = new AtomicInteger(1);
    timer.schedule(new TimerTask() {

        @Override
        public void run() {
            if (!Job.isRunning(self()))
                timer.cancel();
            else {
                double time = (System.nanoTime() - start) / 1e9;
                Trainer trainer = _trainer;
                long processed = trainer == null ? 0 : trainer.processed();
                int ps = (int) (processed / time);
                String text = (int) time + "s, " + processed + " samples (" + (ps) + "/s) ";
                // Build separate nets for scoring purposes, use same normalization stats as for training
                Layer[] temp = build(train, trainLabels, (VecsInput) ls[0], (VecSoftmax) ls[ls.length - 1]);
                Layer.shareWeights(ls, temp);
                // Estimate training error on subset of dataset for speed
                Errors e = NeuralNet.eval(temp, 1000, null);
                text += "train: " + e;
                text += ", rate: ";
                text += String.format("%.5g", ls[0].rate(processed));
                text += ", momentum: ";
                text += String.format("%.5g", ls[0].momentum(processed));
                System.out.println(text);
                if ((evals.incrementAndGet() % 1) == 0) {
                    System.out.println("Computing test error");
                    temp = build(test, testLabels, (VecsInput) ls[0], (VecSoftmax) ls[ls.length - 1]);
                    Layer.shareWeights(ls, temp);
                    e = NeuralNet.eval(temp, 0, null);
                    System.out.println("Test error: " + e);
                }
            }
        }
    }, 0, 10);
    startTraining(ls);
}
Also used : Frame(water.fvec.Frame) VecSoftmax(hex.Layer.VecSoftmax) Trainer(hex.Trainer) Layer(hex.Layer) Errors(hex.NeuralNet.Errors) Timer(java.util.Timer) TimerTask(java.util.TimerTask) Vec(water.fvec.Vec) AppendableVec(water.fvec.AppendableVec) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) VecsInput(hex.Layer.VecsInput)

Aggregations

Layer (hex.Layer)8 VecsInput (hex.Layer.VecsInput)5 VecSoftmax (hex.Layer.VecSoftmax)4 NeuralNet (hex.NeuralNet)3 Trainer (hex.Trainer)2 MnistCanvas (hex.MnistCanvas)1 Errors (hex.NeuralNet.Errors)1 Timer (java.util.Timer)1 TimerTask (java.util.TimerTask)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 H2OCountedCompleter (water.H2O.H2OCountedCompleter)1 AppendableVec (water.fvec.AppendableVec)1 Frame (water.fvec.Frame)1 Vec (water.fvec.Vec)1