use of org.deeplearning4j.nn.layers.recurrent.GravesLSTM in project deeplearning4j by deeplearning4j.
the class ComputationGraphTestRNN method testRnnTimeStepMultipleInOut.
@Test
public void testRnnTimeStepMultipleInOut() {
//Test rnnTimeStep functionality with multiple inputs and outputs...
Nd4j.getRandom().setSeed(12345);
int timeSeriesLength = 12;
//4 layer network: 2 GravesLSTM + DenseLayer + RnnOutputLayer. Hence also tests preprocessors.
//Network architecture: lstm0 -> Dense -> RnnOutputLayer0
// and lstm1 -> Dense -> RnnOutputLayer1
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).graphBuilder().addInputs("in0", "in1").addLayer("lstm0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(5).nOut(6).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "in0").addLayer("lstm1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(4).nOut(5).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "in1").addLayer("dense", new DenseLayer.Builder().nIn(6 + 5).nOut(9).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "lstm0", "lstm1").addLayer("out0", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(9).nOut(3).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "dense").addLayer("out1", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(9).nOut(4).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "dense").setOutputs("out0", "out1").inputPreProcessor("dense", new RnnToFeedForwardPreProcessor()).inputPreProcessor("out0", new FeedForwardToRnnPreProcessor()).inputPreProcessor("out1", new FeedForwardToRnnPreProcessor()).pretrain(false).backprop(true).build();
ComputationGraph graph = new ComputationGraph(conf);
graph.init();
INDArray input0 = Nd4j.rand(new int[] { 3, 5, timeSeriesLength });
INDArray input1 = Nd4j.rand(new int[] { 3, 4, timeSeriesLength });
Map<String, INDArray> allOutputActivations = graph.feedForward(new INDArray[] { input0, input1 }, true);
INDArray fullActLSTM0 = allOutputActivations.get("lstm0");
INDArray fullActLSTM1 = allOutputActivations.get("lstm1");
INDArray fullActOut0 = allOutputActivations.get("out0");
INDArray fullActOut1 = allOutputActivations.get("out1");
assertArrayEquals(new int[] { 3, 6, timeSeriesLength }, fullActLSTM0.shape());
assertArrayEquals(new int[] { 3, 5, timeSeriesLength }, fullActLSTM1.shape());
assertArrayEquals(new int[] { 3, 3, timeSeriesLength }, fullActOut0.shape());
assertArrayEquals(new int[] { 3, 4, timeSeriesLength }, fullActOut1.shape());
int[] inputLengths = { 1, 2, 3, 4, 6, 12 };
//Should get the same result regardless of step size; should be identical to standard forward pass
for (int i = 0; i < inputLengths.length; i++) {
int inLength = inputLengths[i];
//each of length inLength
int nSteps = timeSeriesLength / inLength;
graph.rnnClearPreviousState();
for (int j = 0; j < nSteps; j++) {
int startTimeRange = j * inLength;
int endTimeRange = startTimeRange + inLength;
INDArray inputSubset0 = input0.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
if (inLength > 1)
assertTrue(inputSubset0.size(2) == inLength);
INDArray inputSubset1 = input1.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
if (inLength > 1)
assertTrue(inputSubset1.size(2) == inLength);
INDArray[] outArr = graph.rnnTimeStep(inputSubset0, inputSubset1);
assertEquals(2, outArr.length);
INDArray out0 = outArr[0];
INDArray out1 = outArr[1];
INDArray expOutSubset0;
if (inLength == 1) {
int[] sizes = new int[] { fullActOut0.size(0), fullActOut0.size(1), 1 };
expOutSubset0 = Nd4j.create(sizes);
expOutSubset0.tensorAlongDimension(0, 1, 0).assign(fullActOut0.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(startTimeRange)));
} else {
expOutSubset0 = fullActOut0.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
}
INDArray expOutSubset1;
if (inLength == 1) {
int[] sizes = new int[] { fullActOut1.size(0), fullActOut1.size(1), 1 };
expOutSubset1 = Nd4j.create(sizes);
expOutSubset1.tensorAlongDimension(0, 1, 0).assign(fullActOut1.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(startTimeRange)));
} else {
expOutSubset1 = fullActOut1.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
}
assertEquals(expOutSubset0, out0);
assertEquals(expOutSubset1, out1);
Map<String, INDArray> currLSTM0State = graph.rnnGetPreviousState("lstm0");
Map<String, INDArray> currLSTM1State = graph.rnnGetPreviousState("lstm1");
INDArray lastActL0 = currLSTM0State.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
INDArray lastActL1 = currLSTM1State.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
INDArray expLastActL0 = fullActLSTM0.tensorAlongDimension(endTimeRange - 1, 1, 0);
INDArray expLastActL1 = fullActLSTM1.tensorAlongDimension(endTimeRange - 1, 1, 0);
assertEquals(expLastActL0, lastActL0);
assertEquals(expLastActL1, lastActL1);
}
}
}
use of org.deeplearning4j.nn.layers.recurrent.GravesLSTM in project deeplearning4j by deeplearning4j.
the class MultiLayerTestRNN method testGravesTLSTMInitStacked.
@Test
public void testGravesTLSTMInitStacked() {
int nIn = 8;
int nOut = 25;
int[] nHiddenUnits = { 17, 19, 23 };
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list().layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(17).weightInit(WeightInit.DISTRIBUTION).activation(Activation.TANH).build()).layer(1, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(17).nOut(19).weightInit(WeightInit.DISTRIBUTION).activation(Activation.TANH).build()).layer(2, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(19).nOut(23).weightInit(WeightInit.DISTRIBUTION).activation(Activation.TANH).build()).layer(3, new RnnOutputLayer.Builder(LossFunction.MSE).nIn(23).nOut(nOut).weightInit(WeightInit.DISTRIBUTION).activation(Activation.TANH).build()).build();
MultiLayerNetwork network = new MultiLayerNetwork(conf);
network.init();
//Ensure that we have the correct number weights and biases, that these have correct shape etc. for each layer
for (int i = 0; i < nHiddenUnits.length; i++) {
Layer layer = network.getLayer(i);
assertTrue(layer instanceof GravesLSTM);
Map<String, INDArray> paramTable = layer.paramTable();
//2 sets of weights, 1 set of biases
assertTrue(paramTable.size() == 3);
int layerNIn = (i == 0 ? nIn : nHiddenUnits[i - 1]);
INDArray recurrentWeights = paramTable.get(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY);
//Should be shape: [layerSize,4*layerSize+3]
assertArrayEquals(recurrentWeights.shape(), new int[] { nHiddenUnits[i], 4 * nHiddenUnits[i] + 3 });
INDArray inputWeights = paramTable.get(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY);
//Should be shape: [nIn,4*layerSize]
assertArrayEquals(inputWeights.shape(), new int[] { layerNIn, 4 * nHiddenUnits[i] });
INDArray biases = paramTable.get(GravesLSTMParamInitializer.BIAS_KEY);
//Should be shape: [1,4*layerSize]
assertArrayEquals(biases.shape(), new int[] { 1, 4 * nHiddenUnits[i] });
//Want forget gate biases to be initialized to > 0. See parameter initializer for details
INDArray forgetGateBiases = biases.get(NDArrayIndex.point(0), NDArrayIndex.interval(nHiddenUnits[i], 2 * nHiddenUnits[i]));
assertEquals(nHiddenUnits[i], (int) forgetGateBiases.gt(0).sum(Integer.MAX_VALUE).getDouble(0));
int nParams = recurrentWeights.length() + inputWeights.length() + biases.length();
assertTrue(nParams == layer.numParams());
}
}
use of org.deeplearning4j.nn.layers.recurrent.GravesLSTM in project deeplearning4j by deeplearning4j.
the class ComputationGraphTestRNN method testRnnTimeStepGravesLSTM.
@Test
public void testRnnTimeStepGravesLSTM() {
Nd4j.getRandom().setSeed(12345);
int timeSeriesLength = 12;
//4 layer network: 2 GravesLSTM + DenseLayer + RnnOutputLayer. Hence also tests preprocessors.
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).graphBuilder().addInputs("in").addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(5).nOut(7).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "in").addLayer("1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "0").addLayer("2", new DenseLayer.Builder().nIn(8).nOut(9).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "1").addLayer("3", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(9).nOut(4).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "2").setOutputs("3").inputPreProcessor("2", new RnnToFeedForwardPreProcessor()).inputPreProcessor("3", new FeedForwardToRnnPreProcessor()).pretrain(false).backprop(true).build();
ComputationGraph graph = new ComputationGraph(conf);
graph.init();
INDArray input = Nd4j.rand(new int[] { 3, 5, timeSeriesLength });
Map<String, INDArray> allOutputActivations = graph.feedForward(input, true);
INDArray fullOutL0 = allOutputActivations.get("0");
INDArray fullOutL1 = allOutputActivations.get("1");
INDArray fullOutL3 = allOutputActivations.get("3");
assertArrayEquals(new int[] { 3, 7, timeSeriesLength }, fullOutL0.shape());
assertArrayEquals(new int[] { 3, 8, timeSeriesLength }, fullOutL1.shape());
assertArrayEquals(new int[] { 3, 4, timeSeriesLength }, fullOutL3.shape());
int[] inputLengths = { 1, 2, 3, 4, 6, 12 };
//Should get the same result regardless of step size; should be identical to standard forward pass
for (int i = 0; i < inputLengths.length; i++) {
int inLength = inputLengths[i];
//each of length inLength
int nSteps = timeSeriesLength / inLength;
graph.rnnClearPreviousState();
for (int j = 0; j < nSteps; j++) {
int startTimeRange = j * inLength;
int endTimeRange = startTimeRange + inLength;
INDArray inputSubset = input.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
if (inLength > 1)
assertTrue(inputSubset.size(2) == inLength);
INDArray[] outArr = graph.rnnTimeStep(inputSubset);
assertEquals(1, outArr.length);
INDArray out = outArr[0];
INDArray expOutSubset;
if (inLength == 1) {
int[] sizes = new int[] { fullOutL3.size(0), fullOutL3.size(1), 1 };
expOutSubset = Nd4j.create(sizes);
expOutSubset.tensorAlongDimension(0, 1, 0).assign(fullOutL3.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(startTimeRange)));
} else {
expOutSubset = fullOutL3.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
}
assertEquals(expOutSubset, out);
Map<String, INDArray> currL0State = graph.rnnGetPreviousState("0");
Map<String, INDArray> currL1State = graph.rnnGetPreviousState("1");
INDArray lastActL0 = currL0State.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
INDArray lastActL1 = currL1State.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
INDArray expLastActL0 = fullOutL0.tensorAlongDimension(endTimeRange - 1, 1, 0);
INDArray expLastActL1 = fullOutL1.tensorAlongDimension(endTimeRange - 1, 1, 0);
assertEquals(expLastActL0, lastActL0);
assertEquals(expLastActL1, lastActL1);
}
}
}
use of org.deeplearning4j.nn.layers.recurrent.GravesLSTM in project deeplearning4j by deeplearning4j.
the class MultiLayerTestRNN method testRnnTimeStepGravesLSTM.
@Test
public void testRnnTimeStepGravesLSTM() {
Nd4j.getRandom().setSeed(12345);
int timeSeriesLength = 12;
//4 layer network: 2 GravesLSTM + DenseLayer + RnnOutputLayer. Hence also tests preprocessors.
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).list().layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(5).nOut(7).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build()).layer(1, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build()).layer(2, new DenseLayer.Builder().nIn(8).nOut(9).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build()).layer(3, new RnnOutputLayer.Builder(LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(9).nOut(4).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build()).inputPreProcessor(2, new RnnToFeedForwardPreProcessor()).inputPreProcessor(3, new FeedForwardToRnnPreProcessor()).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
INDArray input = Nd4j.rand(new int[] { 3, 5, timeSeriesLength });
List<INDArray> allOutputActivations = mln.feedForward(input, true);
INDArray fullOutL0 = allOutputActivations.get(1);
INDArray fullOutL1 = allOutputActivations.get(2);
INDArray fullOutL3 = allOutputActivations.get(4);
int[] inputLengths = { 1, 2, 3, 4, 6, 12 };
//Should get the same result regardless of step size; should be identical to standard forward pass
for (int i = 0; i < inputLengths.length; i++) {
int inLength = inputLengths[i];
//each of length inLength
int nSteps = timeSeriesLength / inLength;
mln.rnnClearPreviousState();
//Reset; should be set by rnnTimeStep method
mln.setInputMiniBatchSize(1);
for (int j = 0; j < nSteps; j++) {
int startTimeRange = j * inLength;
int endTimeRange = startTimeRange + inLength;
INDArray inputSubset;
if (inLength == 1) {
//Workaround to nd4j bug
int[] sizes = new int[] { input.size(0), input.size(1), 1 };
inputSubset = Nd4j.create(sizes);
inputSubset.tensorAlongDimension(0, 1, 0).assign(input.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(startTimeRange)));
} else {
inputSubset = input.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
}
if (inLength > 1)
assertTrue(inputSubset.size(2) == inLength);
INDArray out = mln.rnnTimeStep(inputSubset);
INDArray expOutSubset;
if (inLength == 1) {
int[] sizes = new int[] { fullOutL3.size(0), fullOutL3.size(1), 1 };
expOutSubset = Nd4j.create(sizes);
expOutSubset.tensorAlongDimension(0, 1, 0).assign(fullOutL3.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(startTimeRange)));
} else {
expOutSubset = fullOutL3.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
}
assertEquals(expOutSubset, out);
Map<String, INDArray> currL0State = mln.rnnGetPreviousState(0);
Map<String, INDArray> currL1State = mln.rnnGetPreviousState(1);
INDArray lastActL0 = currL0State.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
INDArray lastActL1 = currL1State.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
INDArray expLastActL0 = fullOutL0.tensorAlongDimension(endTimeRange - 1, 1, 0);
INDArray expLastActL1 = fullOutL1.tensorAlongDimension(endTimeRange - 1, 1, 0);
assertEquals(expLastActL0, lastActL0);
assertEquals(expLastActL1, lastActL1);
}
}
}
use of org.deeplearning4j.nn.layers.recurrent.GravesLSTM in project deeplearning4j by deeplearning4j.
the class MultiLayerTestRNN method testGravesLSTMInit.
@Test
public void testGravesLSTMInit() {
int nIn = 8;
int nOut = 25;
int nHiddenUnits = 17;
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list().layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(nHiddenUnits).weightInit(WeightInit.DISTRIBUTION).activation(Activation.TANH).build()).layer(1, new RnnOutputLayer.Builder(LossFunction.MSE).nIn(nHiddenUnits).nOut(nOut).weightInit(WeightInit.DISTRIBUTION).activation(Activation.TANH).build()).build();
MultiLayerNetwork network = new MultiLayerNetwork(conf);
network.init();
//Ensure that we have the correct number weights and biases, that these have correct shape etc.
Layer layer = network.getLayer(0);
assertTrue(layer instanceof GravesLSTM);
Map<String, INDArray> paramTable = layer.paramTable();
//2 sets of weights, 1 set of biases
assertTrue(paramTable.size() == 3);
INDArray recurrentWeights = paramTable.get(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY);
//Should be shape: [layerSize,4*layerSize+3]
assertArrayEquals(recurrentWeights.shape(), new int[] { nHiddenUnits, 4 * nHiddenUnits + 3 });
INDArray inputWeights = paramTable.get(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY);
//Should be shape: [nIn,4*layerSize]
assertArrayEquals(inputWeights.shape(), new int[] { nIn, 4 * nHiddenUnits });
INDArray biases = paramTable.get(GravesLSTMParamInitializer.BIAS_KEY);
//Should be shape: [1,4*layerSize]
assertArrayEquals(biases.shape(), new int[] { 1, 4 * nHiddenUnits });
//Want forget gate biases to be initialized to > 0. See parameter initializer for details
INDArray forgetGateBiases = biases.get(NDArrayIndex.point(0), NDArrayIndex.interval(nHiddenUnits, 2 * nHiddenUnits));
assertEquals(nHiddenUnits, (int) forgetGateBiases.gt(0).sum(Integer.MAX_VALUE).getDouble(0));
int nParams = recurrentWeights.length() + inputWeights.length() + biases.length();
assertTrue(nParams == layer.numParams());
}
Aggregations