Search in sources :

Example 1 with RnnOutputLayer

use of org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer in project deeplearning4j by deeplearning4j.

the class OutputLayerTest method testRnnOutputLayerIncEdgeCases.

@Test
public void testRnnOutputLayerIncEdgeCases() {
    //Basic test + test edge cases: timeSeriesLength==1, miniBatchSize==1, both
    int[] tsLength = { 5, 1, 5, 1 };
    int[] miniBatch = { 7, 7, 1, 1 };
    int nIn = 3;
    int nOut = 6;
    int layerSize = 4;
    FeedForwardToRnnPreProcessor proc = new FeedForwardToRnnPreProcessor();
    for (int t = 0; t < tsLength.length; t++) {
        Nd4j.getRandom().setSeed(12345);
        int timeSeriesLength = tsLength[t];
        int miniBatchSize = miniBatch[t];
        Random r = new Random(12345L);
        INDArray input = Nd4j.zeros(miniBatchSize, nIn, timeSeriesLength);
        for (int i = 0; i < miniBatchSize; i++) {
            for (int j = 0; j < nIn; j++) {
                for (int k = 0; k < timeSeriesLength; k++) {
                    input.putScalar(new int[] { i, j, k }, r.nextDouble() - 0.5);
                }
            }
        }
        INDArray labels3d = Nd4j.zeros(miniBatchSize, nOut, timeSeriesLength);
        for (int i = 0; i < miniBatchSize; i++) {
            for (int j = 0; j < timeSeriesLength; j++) {
                int idx = r.nextInt(nOut);
                labels3d.putScalar(new int[] { i, idx, j }, 1.0f);
            }
        }
        INDArray labels2d = proc.backprop(labels3d, miniBatchSize);
        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L).list().layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).activation(Activation.TANH).updater(Updater.NONE).build()).layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build()).inputPreProcessor(1, new RnnToFeedForwardPreProcessor()).pretrain(false).backprop(true).build();
        MultiLayerNetwork mln = new MultiLayerNetwork(conf);
        mln.init();
        INDArray out2d = mln.feedForward(input).get(2);
        INDArray out3d = proc.preProcess(out2d, miniBatchSize);
        MultiLayerConfiguration confRnn = new NeuralNetConfiguration.Builder().seed(12345L).list().layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).activation(Activation.TANH).updater(Updater.NONE).build()).layer(1, new org.deeplearning4j.nn.conf.layers.RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build()).pretrain(false).backprop(true).build();
        MultiLayerNetwork mlnRnn = new MultiLayerNetwork(confRnn);
        mlnRnn.init();
        INDArray outRnn = mlnRnn.feedForward(input).get(2);
        mln.setLabels(labels2d);
        mlnRnn.setLabels(labels3d);
        mln.computeGradientAndScore();
        mlnRnn.computeGradientAndScore();
        //score is average over all examples.
        //However: OutputLayer version has miniBatch*timeSeriesLength "examples" (after reshaping)
        //RnnOutputLayer has miniBatch examples
        //Hence: expect difference in scores by factor of timeSeriesLength
        double score = mln.score() * timeSeriesLength;
        double scoreRNN = mlnRnn.score();
        assertTrue(!Double.isNaN(score));
        assertTrue(!Double.isNaN(scoreRNN));
        double relError = Math.abs(score - scoreRNN) / (Math.abs(score) + Math.abs(scoreRNN));
        System.out.println(relError);
        assertTrue(relError < 1e-6);
        //Check labels and inputs for output layer:
        OutputLayer ol = (OutputLayer) mln.getOutputLayer();
        assertArrayEquals(ol.getInput().shape(), new int[] { miniBatchSize * timeSeriesLength, layerSize });
        assertArrayEquals(ol.getLabels().shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
        RnnOutputLayer rnnol = (RnnOutputLayer) mlnRnn.getOutputLayer();
        //assertArrayEquals(rnnol.getInput().shape(),new int[]{miniBatchSize,layerSize,timeSeriesLength});
        //Input may be set by BaseLayer methods. Thus input may end up as reshaped 2d version instead of original 3d version.
        //Not ideal, but everything else works.
        assertArrayEquals(rnnol.getLabels().shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
        //Check shapes of output for both:
        assertArrayEquals(out2d.shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
        INDArray out = mln.output(input);
        assertArrayEquals(out.shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
        INDArray act = mln.activate();
        assertArrayEquals(act.shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
        INDArray preout = mln.preOutput(input);
        assertArrayEquals(preout.shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
        INDArray outFFRnn = mlnRnn.feedForward(input).get(2);
        assertArrayEquals(outFFRnn.shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
        INDArray outRnn2 = mlnRnn.output(input);
        assertArrayEquals(outRnn2.shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
        INDArray actRnn = mlnRnn.activate();
        assertArrayEquals(actRnn.shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
        INDArray preoutRnn = mlnRnn.preOutput(input);
        assertArrayEquals(preoutRnn.shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
    }
}
Also used : RnnOutputLayer(org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer) RnnOutputLayer(org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) RnnToFeedForwardPreProcessor(org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) Random(java.util.Random) INDArray(org.nd4j.linalg.api.ndarray.INDArray) GravesLSTM(org.deeplearning4j.nn.conf.layers.GravesLSTM) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) FeedForwardToRnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Test(org.junit.Test)

Example 2 with RnnOutputLayer

use of org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer in project deeplearning4j by deeplearning4j.

the class OutputLayerTest method testOutputLayersRnnForwardPass.

@Test
public void testOutputLayersRnnForwardPass() {
    //Test output layer with RNNs (
    //Expect all outputs etc. to be 2d
    int nIn = 2;
    int nOut = 5;
    int layerSize = 4;
    int timeSeriesLength = 6;
    int miniBatchSize = 3;
    Random r = new Random(12345L);
    INDArray input = Nd4j.zeros(miniBatchSize, nIn, timeSeriesLength);
    for (int i = 0; i < miniBatchSize; i++) {
        for (int j = 0; j < nIn; j++) {
            for (int k = 0; k < timeSeriesLength; k++) {
                input.putScalar(new int[] { i, j, k }, r.nextDouble() - 0.5);
            }
        }
    }
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L).list().layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).activation(Activation.TANH).updater(Updater.NONE).build()).layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build()).inputPreProcessor(1, new RnnToFeedForwardPreProcessor()).build();
    MultiLayerNetwork mln = new MultiLayerNetwork(conf);
    mln.init();
    INDArray out2d = mln.feedForward(input).get(2);
    assertArrayEquals(out2d.shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
    INDArray out = mln.output(input);
    assertArrayEquals(out.shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
    INDArray act = mln.activate();
    assertArrayEquals(act.shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
    INDArray preout = mln.preOutput(input);
    assertArrayEquals(preout.shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
    //As above, but for RnnOutputLayer. Expect all activations etc. to be 3d
    MultiLayerConfiguration confRnn = new NeuralNetConfiguration.Builder().seed(12345L).list().layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).activation(Activation.TANH).updater(Updater.NONE).build()).layer(1, new org.deeplearning4j.nn.conf.layers.RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build()).build();
    MultiLayerNetwork mlnRnn = new MultiLayerNetwork(confRnn);
    mln.init();
    INDArray out3d = mlnRnn.feedForward(input).get(2);
    assertArrayEquals(out3d.shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
    INDArray outRnn = mlnRnn.output(input);
    assertArrayEquals(outRnn.shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
    INDArray actRnn = mlnRnn.activate();
    assertArrayEquals(actRnn.shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
    INDArray preoutRnn = mlnRnn.preOutput(input);
    assertArrayEquals(preoutRnn.shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
}
Also used : RnnOutputLayer(org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) RnnToFeedForwardPreProcessor(org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) Random(java.util.Random) INDArray(org.nd4j.linalg.api.ndarray.INDArray) GravesLSTM(org.deeplearning4j.nn.conf.layers.GravesLSTM) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Test(org.junit.Test)

Aggregations

Random (java.util.Random)2 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)2 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)2 NormalDistribution (org.deeplearning4j.nn.conf.distribution.NormalDistribution)2 GravesLSTM (org.deeplearning4j.nn.conf.layers.GravesLSTM)2 RnnToFeedForwardPreProcessor (org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor)2 RnnOutputLayer (org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer)2 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)2 Test (org.junit.Test)2 INDArray (org.nd4j.linalg.api.ndarray.INDArray)2 FeedForwardToRnnPreProcessor (org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor)1