Search in sources :

Example 91 with MultiLayerNetwork

use of org.deeplearning4j.nn.multilayer.MultiLayerNetwork in project deeplearning4j by deeplearning4j.

the class ComputationGraph method rnnUpdateStateWithTBPTTState.

/**
     * Update the internal state of RNN layers after a truncated BPTT fit call
     */
protected void rnnUpdateStateWithTBPTTState() {
    for (int i = 0; i < layers.length; i++) {
        if (layers[i] instanceof RecurrentLayer) {
            RecurrentLayer l = ((RecurrentLayer) layers[i]);
            l.rnnSetPreviousState(l.rnnGetTBPTTState());
        } else if (layers[i] instanceof MultiLayerNetwork) {
            ((MultiLayerNetwork) layers[i]).updateRnnStateWithTBPTTState();
        }
    }
}
Also used : MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) RecurrentLayer(org.deeplearning4j.nn.api.layers.RecurrentLayer)

Example 92 with MultiLayerNetwork

use of org.deeplearning4j.nn.multilayer.MultiLayerNetwork in project deeplearning4j by deeplearning4j.

the class DropoutLayerTest method testDropoutLayerWithConvMnist.

@Test
public void testDropoutLayerWithConvMnist() throws Exception {
    DataSetIterator iter = new MnistDataSetIterator(2, 2);
    DataSet next = iter.next();
    // Run without separate activation layer
    MultiLayerConfiguration confIntegrated = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).seed(123).list().layer(0, new ConvolutionLayer.Builder(4, 4).stride(2, 2).nIn(1).nOut(20).activation(Activation.RELU).weightInit(WeightInit.XAVIER).build()).layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).dropOut(0.25).nOut(10).build()).backprop(true).pretrain(false).setInputType(InputType.convolutionalFlat(28, 28, 1)).build();
    MultiLayerNetwork netIntegrated = new MultiLayerNetwork(confIntegrated);
    netIntegrated.init();
    netIntegrated.fit(next);
    // Run with separate activation layer
    MultiLayerConfiguration confSeparate = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).seed(123).list().layer(0, new ConvolutionLayer.Builder(4, 4).stride(2, 2).nIn(1).nOut(20).activation(Activation.RELU).weightInit(WeightInit.XAVIER).build()).layer(1, new DropoutLayer.Builder(0.25).build()).layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nOut(10).build()).backprop(true).pretrain(false).setInputType(InputType.convolutionalFlat(28, 28, 1)).build();
    MultiLayerNetwork netSeparate = new MultiLayerNetwork(confSeparate);
    netSeparate.init();
    netSeparate.fit(next);
    // check parameters
    assertEquals(netIntegrated.getLayer(0).getParam("W"), netSeparate.getLayer(0).getParam("W"));
    assertEquals(netIntegrated.getLayer(0).getParam("b"), netSeparate.getLayer(0).getParam("b"));
    assertEquals(netIntegrated.getLayer(1).getParam("W"), netSeparate.getLayer(2).getParam("W"));
    assertEquals(netIntegrated.getLayer(1).getParam("b"), netSeparate.getLayer(2).getParam("b"));
    // check activations
    netIntegrated.setInput(next.getFeatureMatrix());
    netSeparate.setInput(next.getFeatureMatrix());
    Nd4j.getRandom().setSeed(12345);
    List<INDArray> actTrainIntegrated = netIntegrated.feedForward(true);
    Nd4j.getRandom().setSeed(12345);
    List<INDArray> actTrainSeparate = netSeparate.feedForward(true);
    assertEquals(actTrainIntegrated.get(1), actTrainSeparate.get(1));
    assertEquals(actTrainIntegrated.get(2), actTrainSeparate.get(3));
    Nd4j.getRandom().setSeed(12345);
    List<INDArray> actTestIntegrated = netIntegrated.feedForward(false);
    Nd4j.getRandom().setSeed(12345);
    List<INDArray> actTestSeparate = netSeparate.feedForward(false);
    assertEquals(actTestIntegrated.get(1), actTrainSeparate.get(1));
    assertEquals(actTestIntegrated.get(2), actTestSeparate.get(3));
}
Also used : OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) MnistDataSetIterator(org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator) DataSet(org.nd4j.linalg.dataset.DataSet) DropoutLayer(org.deeplearning4j.nn.conf.layers.DropoutLayer) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) ConvolutionLayer(org.deeplearning4j.nn.conf.layers.ConvolutionLayer) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) INDArray(org.nd4j.linalg.api.ndarray.INDArray) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) MnistDataSetIterator(org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator) Test(org.junit.Test)

Example 93 with MultiLayerNetwork

use of org.deeplearning4j.nn.multilayer.MultiLayerNetwork in project deeplearning4j by deeplearning4j.

the class FrozenLayerTest method cloneMLNFrozen.

@Test
public void cloneMLNFrozen() {
    DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 3));
    NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.1).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD).activation(Activation.IDENTITY);
    MultiLayerNetwork modelToFineTune = new MultiLayerNetwork(overallConf.list().layer(0, new DenseLayer.Builder().nIn(4).nOut(3).build()).layer(1, new DenseLayer.Builder().nIn(3).nOut(2).build()).layer(2, new DenseLayer.Builder().nIn(2).nOut(3).build()).layer(3, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3).build()).build());
    modelToFineTune.init();
    INDArray asFrozenFeatures = modelToFineTune.feedForwardToLayer(2, randomData.getFeatures(), false).get(2);
    MultiLayerNetwork modelNow = new TransferLearning.Builder(modelToFineTune).setFeatureExtractor(1).build();
    MultiLayerNetwork clonedModel = modelNow.clone();
    //Check json
    assertEquals(clonedModel.getLayerWiseConfigurations().toJson(), modelNow.getLayerWiseConfigurations().toJson());
    //Check params
    assertEquals(modelNow.params(), clonedModel.params());
    MultiLayerNetwork notFrozen = new MultiLayerNetwork(overallConf.list().layer(0, new DenseLayer.Builder().nIn(2).nOut(3).build()).layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3).build()).build(), Nd4j.hstack(modelToFineTune.getLayer(2).params(), modelToFineTune.getLayer(3).params()));
    int i = 0;
    while (i < 5) {
        notFrozen.fit(new DataSet(asFrozenFeatures, randomData.getLabels()));
        modelNow.fit(randomData);
        clonedModel.fit(randomData);
        i++;
    }
    INDArray expectedParams = Nd4j.hstack(modelToFineTune.getLayer(0).params(), modelToFineTune.getLayer(1).params(), notFrozen.params());
    assertEquals(expectedParams, modelNow.params());
    assertEquals(expectedParams, clonedModel.params());
}
Also used : DataSet(org.nd4j.linalg.dataset.DataSet) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) INDArray(org.nd4j.linalg.api.ndarray.INDArray) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Test(org.junit.Test)

Example 94 with MultiLayerNetwork

use of org.deeplearning4j.nn.multilayer.MultiLayerNetwork in project deeplearning4j by deeplearning4j.

the class FrozenLayerTest method testFrozen.

/*
        A model with a few frozen layers ==
            Model with non frozen layers set with the output of the forward pass of the frozen layers
     */
@Test
public void testFrozen() {
    DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 3));
    NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().learningRate(0.1).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD).activation(Activation.IDENTITY);
    FineTuneConfiguration finetune = new FineTuneConfiguration.Builder().learningRate(0.1).build();
    MultiLayerNetwork modelToFineTune = new MultiLayerNetwork(overallConf.clone().list().layer(0, new DenseLayer.Builder().nIn(4).nOut(3).build()).layer(1, new DenseLayer.Builder().nIn(3).nOut(2).build()).layer(2, new DenseLayer.Builder().nIn(2).nOut(3).build()).layer(3, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3).build()).build());
    modelToFineTune.init();
    List<INDArray> ff = modelToFineTune.feedForwardToLayer(2, randomData.getFeatures(), false);
    INDArray asFrozenFeatures = ff.get(2);
    MultiLayerNetwork modelNow = new TransferLearning.Builder(modelToFineTune).fineTuneConfiguration(finetune).setFeatureExtractor(1).build();
    INDArray paramsLastTwoLayers = Nd4j.hstack(modelToFineTune.getLayer(2).params(), modelToFineTune.getLayer(3).params());
    MultiLayerNetwork notFrozen = new MultiLayerNetwork(overallConf.clone().list().layer(0, new DenseLayer.Builder().nIn(2).nOut(3).build()).layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3).build()).build(), paramsLastTwoLayers);
    //        assertEquals(modelNow.getLayer(2).conf(), notFrozen.getLayer(0).conf());  //Equal, other than names
    //        assertEquals(modelNow.getLayer(3).conf(), notFrozen.getLayer(1).conf());  //Equal, other than names
    //Check: forward pass
    INDArray outNow = modelNow.output(randomData.getFeatures());
    INDArray outNotFrozen = notFrozen.output(asFrozenFeatures);
    assertEquals(outNow, outNotFrozen);
    for (int i = 0; i < 5; i++) {
        notFrozen.fit(new DataSet(asFrozenFeatures, randomData.getLabels()));
        modelNow.fit(randomData);
    }
    INDArray expected = Nd4j.hstack(modelToFineTune.getLayer(0).params(), modelToFineTune.getLayer(1).params(), notFrozen.params());
    INDArray act = modelNow.params();
    assertEquals(expected, act);
}
Also used : DataSet(org.nd4j.linalg.dataset.DataSet) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) INDArray(org.nd4j.linalg.api.ndarray.INDArray) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) FineTuneConfiguration(org.deeplearning4j.nn.transferlearning.FineTuneConfiguration) TransferLearning(org.deeplearning4j.nn.transferlearning.TransferLearning) Test(org.junit.Test)

Example 95 with MultiLayerNetwork

use of org.deeplearning4j.nn.multilayer.MultiLayerNetwork in project deeplearning4j by deeplearning4j.

the class OutputLayerTest method testRnnOutputLayerIncEdgeCases.

@Test
public void testRnnOutputLayerIncEdgeCases() {
    //Basic test + test edge cases: timeSeriesLength==1, miniBatchSize==1, both
    int[] tsLength = { 5, 1, 5, 1 };
    int[] miniBatch = { 7, 7, 1, 1 };
    int nIn = 3;
    int nOut = 6;
    int layerSize = 4;
    FeedForwardToRnnPreProcessor proc = new FeedForwardToRnnPreProcessor();
    for (int t = 0; t < tsLength.length; t++) {
        Nd4j.getRandom().setSeed(12345);
        int timeSeriesLength = tsLength[t];
        int miniBatchSize = miniBatch[t];
        Random r = new Random(12345L);
        INDArray input = Nd4j.zeros(miniBatchSize, nIn, timeSeriesLength);
        for (int i = 0; i < miniBatchSize; i++) {
            for (int j = 0; j < nIn; j++) {
                for (int k = 0; k < timeSeriesLength; k++) {
                    input.putScalar(new int[] { i, j, k }, r.nextDouble() - 0.5);
                }
            }
        }
        INDArray labels3d = Nd4j.zeros(miniBatchSize, nOut, timeSeriesLength);
        for (int i = 0; i < miniBatchSize; i++) {
            for (int j = 0; j < timeSeriesLength; j++) {
                int idx = r.nextInt(nOut);
                labels3d.putScalar(new int[] { i, idx, j }, 1.0f);
            }
        }
        INDArray labels2d = proc.backprop(labels3d, miniBatchSize);
        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L).list().layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).activation(Activation.TANH).updater(Updater.NONE).build()).layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build()).inputPreProcessor(1, new RnnToFeedForwardPreProcessor()).pretrain(false).backprop(true).build();
        MultiLayerNetwork mln = new MultiLayerNetwork(conf);
        mln.init();
        INDArray out2d = mln.feedForward(input).get(2);
        INDArray out3d = proc.preProcess(out2d, miniBatchSize);
        MultiLayerConfiguration confRnn = new NeuralNetConfiguration.Builder().seed(12345L).list().layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).activation(Activation.TANH).updater(Updater.NONE).build()).layer(1, new org.deeplearning4j.nn.conf.layers.RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build()).pretrain(false).backprop(true).build();
        MultiLayerNetwork mlnRnn = new MultiLayerNetwork(confRnn);
        mlnRnn.init();
        INDArray outRnn = mlnRnn.feedForward(input).get(2);
        mln.setLabels(labels2d);
        mlnRnn.setLabels(labels3d);
        mln.computeGradientAndScore();
        mlnRnn.computeGradientAndScore();
        //score is average over all examples.
        //However: OutputLayer version has miniBatch*timeSeriesLength "examples" (after reshaping)
        //RnnOutputLayer has miniBatch examples
        //Hence: expect difference in scores by factor of timeSeriesLength
        double score = mln.score() * timeSeriesLength;
        double scoreRNN = mlnRnn.score();
        assertTrue(!Double.isNaN(score));
        assertTrue(!Double.isNaN(scoreRNN));
        double relError = Math.abs(score - scoreRNN) / (Math.abs(score) + Math.abs(scoreRNN));
        System.out.println(relError);
        assertTrue(relError < 1e-6);
        //Check labels and inputs for output layer:
        OutputLayer ol = (OutputLayer) mln.getOutputLayer();
        assertArrayEquals(ol.getInput().shape(), new int[] { miniBatchSize * timeSeriesLength, layerSize });
        assertArrayEquals(ol.getLabels().shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
        RnnOutputLayer rnnol = (RnnOutputLayer) mlnRnn.getOutputLayer();
        //assertArrayEquals(rnnol.getInput().shape(),new int[]{miniBatchSize,layerSize,timeSeriesLength});
        //Input may be set by BaseLayer methods. Thus input may end up as reshaped 2d version instead of original 3d version.
        //Not ideal, but everything else works.
        assertArrayEquals(rnnol.getLabels().shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
        //Check shapes of output for both:
        assertArrayEquals(out2d.shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
        INDArray out = mln.output(input);
        assertArrayEquals(out.shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
        INDArray act = mln.activate();
        assertArrayEquals(act.shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
        INDArray preout = mln.preOutput(input);
        assertArrayEquals(preout.shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
        INDArray outFFRnn = mlnRnn.feedForward(input).get(2);
        assertArrayEquals(outFFRnn.shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
        INDArray outRnn2 = mlnRnn.output(input);
        assertArrayEquals(outRnn2.shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
        INDArray actRnn = mlnRnn.activate();
        assertArrayEquals(actRnn.shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
        INDArray preoutRnn = mlnRnn.preOutput(input);
        assertArrayEquals(preoutRnn.shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
    }
}
Also used : RnnOutputLayer(org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer) RnnOutputLayer(org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) RnnToFeedForwardPreProcessor(org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) Random(java.util.Random) INDArray(org.nd4j.linalg.api.ndarray.INDArray) GravesLSTM(org.deeplearning4j.nn.conf.layers.GravesLSTM) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) FeedForwardToRnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Test(org.junit.Test)

Aggregations

MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)326 Test (org.junit.Test)277 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)206 INDArray (org.nd4j.linalg.api.ndarray.INDArray)166 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)111 DataSet (org.nd4j.linalg.dataset.DataSet)91 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)70 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)49 NormalDistribution (org.deeplearning4j.nn.conf.distribution.NormalDistribution)43 ScoreIterationListener (org.deeplearning4j.optimize.listeners.ScoreIterationListener)41 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)40 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)38 Random (java.util.Random)34 MnistDataSetIterator (org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator)30 ConvolutionLayer (org.deeplearning4j.nn.conf.layers.ConvolutionLayer)28 DL4JException (org.deeplearning4j.exception.DL4JException)20 Layer (org.deeplearning4j.nn.api.Layer)20 ClassPathResource (org.nd4j.linalg.io.ClassPathResource)20 File (java.io.File)19 ComputationGraph (org.deeplearning4j.nn.graph.ComputationGraph)19