Examples with GravesLSTM - org.deeplearning4j.nn.layers.recurrent.GravesLSTM

Example 1 with GravesLSTM

use of org.deeplearning4j.nn.layers.recurrent.GravesLSTM in project deeplearning4j by deeplearning4j.

the class ComputationGraphTestRNN method testRnnTimeStepMultipleInOut.

@Test
public void testRnnTimeStepMultipleInOut() {
    //Test rnnTimeStep functionality with multiple inputs and outputs...
    Nd4j.getRandom().setSeed(12345);
    int timeSeriesLength = 12;
    //4 layer network: 2 GravesLSTM + DenseLayer + RnnOutputLayer. Hence also tests preprocessors.
    //Network architecture: lstm0 -> Dense -> RnnOutputLayer0
    // and lstm1 -> Dense -> RnnOutputLayer1
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).graphBuilder().addInputs("in0", "in1").addLayer("lstm0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(5).nOut(6).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "in0").addLayer("lstm1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(4).nOut(5).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "in1").addLayer("dense", new DenseLayer.Builder().nIn(6 + 5).nOut(9).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "lstm0", "lstm1").addLayer("out0", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(9).nOut(3).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "dense").addLayer("out1", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(9).nOut(4).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "dense").setOutputs("out0", "out1").inputPreProcessor("dense", new RnnToFeedForwardPreProcessor()).inputPreProcessor("out0", new FeedForwardToRnnPreProcessor()).inputPreProcessor("out1", new FeedForwardToRnnPreProcessor()).pretrain(false).backprop(true).build();
    ComputationGraph graph = new ComputationGraph(conf);
    graph.init();
    INDArray input0 = Nd4j.rand(new int[] { 3, 5, timeSeriesLength });
    INDArray input1 = Nd4j.rand(new int[] { 3, 4, timeSeriesLength });
    Map<String, INDArray> allOutputActivations = graph.feedForward(new INDArray[] { input0, input1 }, true);
    INDArray fullActLSTM0 = allOutputActivations.get("lstm0");
    INDArray fullActLSTM1 = allOutputActivations.get("lstm1");
    INDArray fullActOut0 = allOutputActivations.get("out0");
    INDArray fullActOut1 = allOutputActivations.get("out1");
    assertArrayEquals(new int[] { 3, 6, timeSeriesLength }, fullActLSTM0.shape());
    assertArrayEquals(new int[] { 3, 5, timeSeriesLength }, fullActLSTM1.shape());
    assertArrayEquals(new int[] { 3, 3, timeSeriesLength }, fullActOut0.shape());
    assertArrayEquals(new int[] { 3, 4, timeSeriesLength }, fullActOut1.shape());
    int[] inputLengths = { 1, 2, 3, 4, 6, 12 };
    //Should get the same result regardless of step size; should be identical to standard forward pass
    for (int i = 0; i < inputLengths.length; i++) {
        int inLength = inputLengths[i];
        //each of length inLength
        int nSteps = timeSeriesLength / inLength;
        graph.rnnClearPreviousState();
        for (int j = 0; j < nSteps; j++) {
            int startTimeRange = j * inLength;
            int endTimeRange = startTimeRange + inLength;
            INDArray inputSubset0 = input0.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
            if (inLength > 1)
                assertTrue(inputSubset0.size(2) == inLength);
            INDArray inputSubset1 = input1.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
            if (inLength > 1)
                assertTrue(inputSubset1.size(2) == inLength);
            INDArray[] outArr = graph.rnnTimeStep(inputSubset0, inputSubset1);
            assertEquals(2, outArr.length);
            INDArray out0 = outArr[0];
            INDArray out1 = outArr[1];
            INDArray expOutSubset0;
            if (inLength == 1) {
                int[] sizes = new int[] { fullActOut0.size(0), fullActOut0.size(1), 1 };
                expOutSubset0 = Nd4j.create(sizes);
                expOutSubset0.tensorAlongDimension(0, 1, 0).assign(fullActOut0.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(startTimeRange)));
            } else {
                expOutSubset0 = fullActOut0.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
            }
            INDArray expOutSubset1;
            if (inLength == 1) {
                int[] sizes = new int[] { fullActOut1.size(0), fullActOut1.size(1), 1 };
                expOutSubset1 = Nd4j.create(sizes);
                expOutSubset1.tensorAlongDimension(0, 1, 0).assign(fullActOut1.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(startTimeRange)));
            } else {
                expOutSubset1 = fullActOut1.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
            }
            assertEquals(expOutSubset0, out0);
            assertEquals(expOutSubset1, out1);
            Map<String, INDArray> currLSTM0State = graph.rnnGetPreviousState("lstm0");
            Map<String, INDArray> currLSTM1State = graph.rnnGetPreviousState("lstm1");
            INDArray lastActL0 = currLSTM0State.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
            INDArray lastActL1 = currLSTM1State.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
            INDArray expLastActL0 = fullActLSTM0.tensorAlongDimension(endTimeRange - 1, 1, 0);
            INDArray expLastActL1 = fullActLSTM1.tensorAlongDimension(endTimeRange - 1, 1, 0);
            assertEquals(expLastActL0, lastActL0);
            assertEquals(expLastActL1, lastActL1);
        }
    }
}

Also used : RnnOutputLayer(org.deeplearning4j.nn.conf.layers.RnnOutputLayer) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) RnnToFeedForwardPreProcessor(org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor) GravesLSTM(org.deeplearning4j.nn.layers.recurrent.GravesLSTM) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) FeedForwardToRnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor) Test(org.junit.Test)

Example 2 with GravesLSTM

use of org.deeplearning4j.nn.layers.recurrent.GravesLSTM in project deeplearning4j by deeplearning4j.

the class MultiLayerTestRNN method testGravesTLSTMInitStacked.

@Test
public void testGravesTLSTMInitStacked() {
    int nIn = 8;
    int nOut = 25;
    int[] nHiddenUnits = { 17, 19, 23 };
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list().layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(17).weightInit(WeightInit.DISTRIBUTION).activation(Activation.TANH).build()).layer(1, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(17).nOut(19).weightInit(WeightInit.DISTRIBUTION).activation(Activation.TANH).build()).layer(2, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(19).nOut(23).weightInit(WeightInit.DISTRIBUTION).activation(Activation.TANH).build()).layer(3, new RnnOutputLayer.Builder(LossFunction.MSE).nIn(23).nOut(nOut).weightInit(WeightInit.DISTRIBUTION).activation(Activation.TANH).build()).build();
    MultiLayerNetwork network = new MultiLayerNetwork(conf);
    network.init();
    //Ensure that we have the correct number weights and biases, that these have correct shape etc. for each layer
    for (int i = 0; i < nHiddenUnits.length; i++) {
        Layer layer = network.getLayer(i);
        assertTrue(layer instanceof GravesLSTM);
        Map<String, INDArray> paramTable = layer.paramTable();
        //2 sets of weights, 1 set of biases
        assertTrue(paramTable.size() == 3);
        int layerNIn = (i == 0 ? nIn : nHiddenUnits[i - 1]);
        INDArray recurrentWeights = paramTable.get(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY);
        //Should be shape: [layerSize,4*layerSize+3]
        assertArrayEquals(recurrentWeights.shape(), new int[] { nHiddenUnits[i], 4 * nHiddenUnits[i] + 3 });
        INDArray inputWeights = paramTable.get(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY);
        //Should be shape: [nIn,4*layerSize]
        assertArrayEquals(inputWeights.shape(), new int[] { layerNIn, 4 * nHiddenUnits[i] });
        INDArray biases = paramTable.get(GravesLSTMParamInitializer.BIAS_KEY);
        //Should be shape: [1,4*layerSize]
        assertArrayEquals(biases.shape(), new int[] { 1, 4 * nHiddenUnits[i] });
        //Want forget gate biases to be initialized to > 0. See parameter initializer for details
        INDArray forgetGateBiases = biases.get(NDArrayIndex.point(0), NDArrayIndex.interval(nHiddenUnits[i], 2 * nHiddenUnits[i]));
        assertEquals(nHiddenUnits[i], (int) forgetGateBiases.gt(0).sum(Integer.MAX_VALUE).getDouble(0));
        int nParams = recurrentWeights.length() + inputWeights.length() + biases.length();
        assertTrue(nParams == layer.numParams());
    }
}

Also used : RnnOutputLayer(org.deeplearning4j.nn.conf.layers.RnnOutputLayer) Layer(org.deeplearning4j.nn.api.Layer) RnnOutputLayer(org.deeplearning4j.nn.conf.layers.RnnOutputLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) BaseRecurrentLayer(org.deeplearning4j.nn.layers.recurrent.BaseRecurrentLayer) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) GravesLSTM(org.deeplearning4j.nn.layers.recurrent.GravesLSTM) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Test(org.junit.Test)

Example 3 with GravesLSTM

use of org.deeplearning4j.nn.layers.recurrent.GravesLSTM in project deeplearning4j by deeplearning4j.

the class ComputationGraphTestRNN method testRnnTimeStepGravesLSTM.

@Test
public void testRnnTimeStepGravesLSTM() {
    Nd4j.getRandom().setSeed(12345);
    int timeSeriesLength = 12;
    //4 layer network: 2 GravesLSTM + DenseLayer + RnnOutputLayer. Hence also tests preprocessors.
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).graphBuilder().addInputs("in").addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(5).nOut(7).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "in").addLayer("1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "0").addLayer("2", new DenseLayer.Builder().nIn(8).nOut(9).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "1").addLayer("3", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(9).nOut(4).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "2").setOutputs("3").inputPreProcessor("2", new RnnToFeedForwardPreProcessor()).inputPreProcessor("3", new FeedForwardToRnnPreProcessor()).pretrain(false).backprop(true).build();
    ComputationGraph graph = new ComputationGraph(conf);
    graph.init();
    INDArray input = Nd4j.rand(new int[] { 3, 5, timeSeriesLength });
    Map<String, INDArray> allOutputActivations = graph.feedForward(input, true);
    INDArray fullOutL0 = allOutputActivations.get("0");
    INDArray fullOutL1 = allOutputActivations.get("1");
    INDArray fullOutL3 = allOutputActivations.get("3");
    assertArrayEquals(new int[] { 3, 7, timeSeriesLength }, fullOutL0.shape());
    assertArrayEquals(new int[] { 3, 8, timeSeriesLength }, fullOutL1.shape());
    assertArrayEquals(new int[] { 3, 4, timeSeriesLength }, fullOutL3.shape());
    int[] inputLengths = { 1, 2, 3, 4, 6, 12 };
    //Should get the same result regardless of step size; should be identical to standard forward pass
    for (int i = 0; i < inputLengths.length; i++) {
        int inLength = inputLengths[i];
        //each of length inLength
        int nSteps = timeSeriesLength / inLength;
        graph.rnnClearPreviousState();
        for (int j = 0; j < nSteps; j++) {
            int startTimeRange = j * inLength;
            int endTimeRange = startTimeRange + inLength;
            INDArray inputSubset = input.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
            if (inLength > 1)
                assertTrue(inputSubset.size(2) == inLength);
            INDArray[] outArr = graph.rnnTimeStep(inputSubset);
            assertEquals(1, outArr.length);
            INDArray out = outArr[0];
            INDArray expOutSubset;
            if (inLength == 1) {
                int[] sizes = new int[] { fullOutL3.size(0), fullOutL3.size(1), 1 };
                expOutSubset = Nd4j.create(sizes);
                expOutSubset.tensorAlongDimension(0, 1, 0).assign(fullOutL3.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(startTimeRange)));
            } else {
                expOutSubset = fullOutL3.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
            }
            assertEquals(expOutSubset, out);
            Map<String, INDArray> currL0State = graph.rnnGetPreviousState("0");
            Map<String, INDArray> currL1State = graph.rnnGetPreviousState("1");
            INDArray lastActL0 = currL0State.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
            INDArray lastActL1 = currL1State.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
            INDArray expLastActL0 = fullOutL0.tensorAlongDimension(endTimeRange - 1, 1, 0);
            INDArray expLastActL1 = fullOutL1.tensorAlongDimension(endTimeRange - 1, 1, 0);
            assertEquals(expLastActL0, lastActL0);
            assertEquals(expLastActL1, lastActL1);
        }
    }
}

Also used : NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) RnnToFeedForwardPreProcessor(org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor) GravesLSTM(org.deeplearning4j.nn.layers.recurrent.GravesLSTM) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) FeedForwardToRnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor) Test(org.junit.Test)

Example 4 with GravesLSTM

use of org.deeplearning4j.nn.layers.recurrent.GravesLSTM in project deeplearning4j by deeplearning4j.

the class MultiLayerTestRNN method testRnnTimeStepGravesLSTM.

@Test
public void testRnnTimeStepGravesLSTM() {
    Nd4j.getRandom().setSeed(12345);
    int timeSeriesLength = 12;
    //4 layer network: 2 GravesLSTM + DenseLayer + RnnOutputLayer. Hence also tests preprocessors.
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).list().layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(5).nOut(7).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build()).layer(1, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build()).layer(2, new DenseLayer.Builder().nIn(8).nOut(9).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build()).layer(3, new RnnOutputLayer.Builder(LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(9).nOut(4).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build()).inputPreProcessor(2, new RnnToFeedForwardPreProcessor()).inputPreProcessor(3, new FeedForwardToRnnPreProcessor()).build();
    MultiLayerNetwork mln = new MultiLayerNetwork(conf);
    INDArray input = Nd4j.rand(new int[] { 3, 5, timeSeriesLength });
    List<INDArray> allOutputActivations = mln.feedForward(input, true);
    INDArray fullOutL0 = allOutputActivations.get(1);
    INDArray fullOutL1 = allOutputActivations.get(2);
    INDArray fullOutL3 = allOutputActivations.get(4);
    int[] inputLengths = { 1, 2, 3, 4, 6, 12 };
    //Should get the same result regardless of step size; should be identical to standard forward pass
    for (int i = 0; i < inputLengths.length; i++) {
        int inLength = inputLengths[i];
        //each of length inLength
        int nSteps = timeSeriesLength / inLength;
        mln.rnnClearPreviousState();
        //Reset; should be set by rnnTimeStep method
        mln.setInputMiniBatchSize(1);
        for (int j = 0; j < nSteps; j++) {
            int startTimeRange = j * inLength;
            int endTimeRange = startTimeRange + inLength;
            INDArray inputSubset;
            if (inLength == 1) {
                //Workaround to nd4j bug
                int[] sizes = new int[] { input.size(0), input.size(1), 1 };
                inputSubset = Nd4j.create(sizes);
                inputSubset.tensorAlongDimension(0, 1, 0).assign(input.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(startTimeRange)));
            } else {
                inputSubset = input.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
            }
            if (inLength > 1)
                assertTrue(inputSubset.size(2) == inLength);
            INDArray out = mln.rnnTimeStep(inputSubset);
            INDArray expOutSubset;
            if (inLength == 1) {
                int[] sizes = new int[] { fullOutL3.size(0), fullOutL3.size(1), 1 };
                expOutSubset = Nd4j.create(sizes);
                expOutSubset.tensorAlongDimension(0, 1, 0).assign(fullOutL3.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(startTimeRange)));
            } else {
                expOutSubset = fullOutL3.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
            }
            assertEquals(expOutSubset, out);
            Map<String, INDArray> currL0State = mln.rnnGetPreviousState(0);
            Map<String, INDArray> currL1State = mln.rnnGetPreviousState(1);
            INDArray lastActL0 = currL0State.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
            INDArray lastActL1 = currL1State.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
            INDArray expLastActL0 = fullOutL0.tensorAlongDimension(endTimeRange - 1, 1, 0);
            INDArray expLastActL1 = fullOutL1.tensorAlongDimension(endTimeRange - 1, 1, 0);
            assertEquals(expLastActL0, lastActL0);
            assertEquals(expLastActL1, lastActL1);
        }
    }
}

Also used : RnnOutputLayer(org.deeplearning4j.nn.conf.layers.RnnOutputLayer) RnnToFeedForwardPreProcessor(org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) GravesLSTM(org.deeplearning4j.nn.layers.recurrent.GravesLSTM) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) FeedForwardToRnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor) Test(org.junit.Test)

Example 5 with GravesLSTM

use of org.deeplearning4j.nn.layers.recurrent.GravesLSTM in project deeplearning4j by deeplearning4j.

the class MultiLayerTestRNN method testGravesLSTMInit.

@Test
public void testGravesLSTMInit() {
    int nIn = 8;
    int nOut = 25;
    int nHiddenUnits = 17;
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list().layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(nHiddenUnits).weightInit(WeightInit.DISTRIBUTION).activation(Activation.TANH).build()).layer(1, new RnnOutputLayer.Builder(LossFunction.MSE).nIn(nHiddenUnits).nOut(nOut).weightInit(WeightInit.DISTRIBUTION).activation(Activation.TANH).build()).build();
    MultiLayerNetwork network = new MultiLayerNetwork(conf);
    network.init();
    //Ensure that we have the correct number weights and biases, that these have correct shape etc.
    Layer layer = network.getLayer(0);
    assertTrue(layer instanceof GravesLSTM);
    Map<String, INDArray> paramTable = layer.paramTable();
    //2 sets of weights, 1 set of biases
    assertTrue(paramTable.size() == 3);
    INDArray recurrentWeights = paramTable.get(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY);
    //Should be shape: [layerSize,4*layerSize+3]
    assertArrayEquals(recurrentWeights.shape(), new int[] { nHiddenUnits, 4 * nHiddenUnits + 3 });
    INDArray inputWeights = paramTable.get(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY);
    //Should be shape: [nIn,4*layerSize]
    assertArrayEquals(inputWeights.shape(), new int[] { nIn, 4 * nHiddenUnits });
    INDArray biases = paramTable.get(GravesLSTMParamInitializer.BIAS_KEY);
    //Should be shape: [1,4*layerSize]
    assertArrayEquals(biases.shape(), new int[] { 1, 4 * nHiddenUnits });
    //Want forget gate biases to be initialized to > 0. See parameter initializer for details
    INDArray forgetGateBiases = biases.get(NDArrayIndex.point(0), NDArrayIndex.interval(nHiddenUnits, 2 * nHiddenUnits));
    assertEquals(nHiddenUnits, (int) forgetGateBiases.gt(0).sum(Integer.MAX_VALUE).getDouble(0));
    int nParams = recurrentWeights.length() + inputWeights.length() + biases.length();
    assertTrue(nParams == layer.numParams());
}

Aggregations

GravesLSTM (org.deeplearning4j.nn.layers.recurrent.GravesLSTM)5 Test (org.junit.Test)5 INDArray (org.nd4j.linalg.api.ndarray.INDArray)5 RnnOutputLayer (org.deeplearning4j.nn.conf.layers.RnnOutputLayer)4 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)3 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)3 NormalDistribution (org.deeplearning4j.nn.conf.distribution.NormalDistribution)3 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)3 FeedForwardToRnnPreProcessor (org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor)3 RnnToFeedForwardPreProcessor (org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor)3 Layer (org.deeplearning4j.nn.api.Layer)2 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)2 BaseRecurrentLayer (org.deeplearning4j.nn.layers.recurrent.BaseRecurrentLayer)2