use of org.deeplearning4j.nn.conf.distribution.NormalDistribution in project deeplearning4j by deeplearning4j.
the class ComputationGraphTestRNN method testRnnTimeStepGravesLSTM.
@Test
public void testRnnTimeStepGravesLSTM() {
Nd4j.getRandom().setSeed(12345);
int timeSeriesLength = 12;
//4 layer network: 2 GravesLSTM + DenseLayer + RnnOutputLayer. Hence also tests preprocessors.
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).graphBuilder().addInputs("in").addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(5).nOut(7).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "in").addLayer("1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "0").addLayer("2", new DenseLayer.Builder().nIn(8).nOut(9).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "1").addLayer("3", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(9).nOut(4).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "2").setOutputs("3").inputPreProcessor("2", new RnnToFeedForwardPreProcessor()).inputPreProcessor("3", new FeedForwardToRnnPreProcessor()).pretrain(false).backprop(true).build();
ComputationGraph graph = new ComputationGraph(conf);
graph.init();
INDArray input = Nd4j.rand(new int[] { 3, 5, timeSeriesLength });
Map<String, INDArray> allOutputActivations = graph.feedForward(input, true);
INDArray fullOutL0 = allOutputActivations.get("0");
INDArray fullOutL1 = allOutputActivations.get("1");
INDArray fullOutL3 = allOutputActivations.get("3");
assertArrayEquals(new int[] { 3, 7, timeSeriesLength }, fullOutL0.shape());
assertArrayEquals(new int[] { 3, 8, timeSeriesLength }, fullOutL1.shape());
assertArrayEquals(new int[] { 3, 4, timeSeriesLength }, fullOutL3.shape());
int[] inputLengths = { 1, 2, 3, 4, 6, 12 };
//Should get the same result regardless of step size; should be identical to standard forward pass
for (int i = 0; i < inputLengths.length; i++) {
int inLength = inputLengths[i];
//each of length inLength
int nSteps = timeSeriesLength / inLength;
graph.rnnClearPreviousState();
for (int j = 0; j < nSteps; j++) {
int startTimeRange = j * inLength;
int endTimeRange = startTimeRange + inLength;
INDArray inputSubset = input.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
if (inLength > 1)
assertTrue(inputSubset.size(2) == inLength);
INDArray[] outArr = graph.rnnTimeStep(inputSubset);
assertEquals(1, outArr.length);
INDArray out = outArr[0];
INDArray expOutSubset;
if (inLength == 1) {
int[] sizes = new int[] { fullOutL3.size(0), fullOutL3.size(1), 1 };
expOutSubset = Nd4j.create(sizes);
expOutSubset.tensorAlongDimension(0, 1, 0).assign(fullOutL3.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(startTimeRange)));
} else {
expOutSubset = fullOutL3.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
}
assertEquals(expOutSubset, out);
Map<String, INDArray> currL0State = graph.rnnGetPreviousState("0");
Map<String, INDArray> currL1State = graph.rnnGetPreviousState("1");
INDArray lastActL0 = currL0State.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
INDArray lastActL1 = currL1State.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
INDArray expLastActL0 = fullOutL0.tensorAlongDimension(endTimeRange - 1, 1, 0);
INDArray expLastActL1 = fullOutL1.tensorAlongDimension(endTimeRange - 1, 1, 0);
assertEquals(expLastActL0, lastActL0);
assertEquals(expLastActL1, lastActL1);
}
}
}
use of org.deeplearning4j.nn.conf.distribution.NormalDistribution in project deeplearning4j by deeplearning4j.
the class CenterLossOutputLayerTest method getGraph.
private ComputationGraph getGraph(int numLabels, double lambda) {
Nd4j.getRandom().setSeed(12345);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).learningRate(1.0).graphBuilder().addInputs("input1").addLayer("l1", new DenseLayer.Builder().nIn(4).nOut(5).activation(Activation.RELU).build(), "input1").addLayer("lossLayer", new CenterLossOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT).nIn(5).nOut(numLabels).lambda(lambda).activation(Activation.SOFTMAX).build(), "l1").setOutputs("lossLayer").pretrain(false).backprop(true).build();
ComputationGraph graph = new ComputationGraph(conf);
graph.init();
return graph;
}
use of org.deeplearning4j.nn.conf.distribution.NormalDistribution in project deeplearning4j by deeplearning4j.
the class TestVariableLengthTSCG method testOutputMasking.
@Test
public void testOutputMasking() {
//If labels are masked: want zero outputs for that time step.
int nIn = 3;
int[] timeSeriesLengths = { 3, 10 };
int[] outputSizes = { 1, 2, 5 };
int[] miniBatchSizes = { 1, 4 };
Random r = new Random(12345);
for (int tsLength : timeSeriesLengths) {
for (int nOut : outputSizes) {
for (int miniBatch : miniBatchSizes) {
for (int nToMask = 0; nToMask < tsLength - 1; nToMask++) {
INDArray labelMaskArray = Nd4j.ones(miniBatch, tsLength);
for (int i = 0; i < miniBatch; i++) {
//For each example: select which outputs to mask...
int nMasked = 0;
while (nMasked < nToMask) {
int tryIdx = r.nextInt(tsLength);
if (labelMaskArray.getDouble(i, tryIdx) == 0.0)
continue;
labelMaskArray.putScalar(new int[] { i, tryIdx }, 0.0);
nMasked++;
}
}
INDArray input = Nd4j.rand(new int[] { miniBatch, nIn, tsLength });
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false).seed(12345L).graphBuilder().addInputs("in").addLayer("0", new GravesLSTM.Builder().nIn(nIn).nOut(5).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build(), "in").addLayer("1", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY).nIn(5).nOut(nOut).weightInit(WeightInit.XAVIER).updater(Updater.NONE).build(), "0").setOutputs("1").pretrain(false).backprop(true).build();
ComputationGraph net = new ComputationGraph(conf);
net.init();
ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder().regularization(false).seed(12345L).graphBuilder().addInputs("in").addLayer("0", new GravesLSTM.Builder().nIn(nIn).nOut(5).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build(), "in").addLayer("1", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(5).nOut(nOut).weightInit(WeightInit.XAVIER).updater(Updater.NONE).build(), "0").setOutputs("1").pretrain(false).backprop(true).build();
ComputationGraph net2 = new ComputationGraph(conf2);
net2.init();
net.setLayerMaskArrays(null, new INDArray[] { labelMaskArray });
net2.setLayerMaskArrays(null, new INDArray[] { labelMaskArray });
INDArray out = net.output(input)[0];
INDArray out2 = net2.output(input)[0];
for (int i = 0; i < miniBatch; i++) {
for (int j = 0; j < tsLength; j++) {
double m = labelMaskArray.getDouble(i, j);
if (m == 0.0) {
//Expect outputs to be exactly 0.0
INDArray outRow = out.get(NDArrayIndex.point(i), NDArrayIndex.all(), NDArrayIndex.point(j));
INDArray outRow2 = out2.get(NDArrayIndex.point(i), NDArrayIndex.all(), NDArrayIndex.point(j));
for (int k = 0; k < nOut; k++) {
assertEquals(outRow.getDouble(k), 0.0, 0.0);
assertEquals(outRow2.getDouble(k), 0.0, 0.0);
}
}
}
}
}
}
}
}
}
use of org.deeplearning4j.nn.conf.distribution.NormalDistribution in project deeplearning4j by deeplearning4j.
the class MultiLayerTestRNN method testRnnStateMethods.
@Test
public void testRnnStateMethods() {
Nd4j.getRandom().setSeed(12345);
int timeSeriesLength = 6;
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list().layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(5).nOut(7).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build()).layer(1, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build()).layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(8).nOut(4).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build()).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
INDArray input = Nd4j.rand(new int[] { 3, 5, timeSeriesLength });
List<INDArray> allOutputActivations = mln.feedForward(input, true);
INDArray outAct = allOutputActivations.get(3);
INDArray outRnnTimeStep = mln.rnnTimeStep(input);
//Should be identical here
assertTrue(outAct.equals(outRnnTimeStep));
Map<String, INDArray> currStateL0 = mln.rnnGetPreviousState(0);
Map<String, INDArray> currStateL1 = mln.rnnGetPreviousState(1);
assertTrue(currStateL0.size() == 2);
assertTrue(currStateL1.size() == 2);
INDArray lastActL0 = currStateL0.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
INDArray lastMemL0 = currStateL0.get(GravesLSTM.STATE_KEY_PREV_MEMCELL);
assertTrue(lastActL0 != null && lastMemL0 != null);
INDArray lastActL1 = currStateL1.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
INDArray lastMemL1 = currStateL1.get(GravesLSTM.STATE_KEY_PREV_MEMCELL);
assertTrue(lastActL1 != null && lastMemL1 != null);
INDArray expectedLastActL0 = allOutputActivations.get(1).tensorAlongDimension(timeSeriesLength - 1, 1, 0);
assertTrue(expectedLastActL0.equals(lastActL0));
INDArray expectedLastActL1 = allOutputActivations.get(2).tensorAlongDimension(timeSeriesLength - 1, 1, 0);
assertTrue(expectedLastActL1.equals(lastActL1));
//Check clearing and setting of state:
mln.rnnClearPreviousState();
assertTrue(mln.rnnGetPreviousState(0).isEmpty());
assertTrue(mln.rnnGetPreviousState(1).isEmpty());
mln.rnnSetPreviousState(0, currStateL0);
assertTrue(mln.rnnGetPreviousState(0).size() == 2);
mln.rnnSetPreviousState(1, currStateL1);
assertTrue(mln.rnnGetPreviousState(1).size() == 2);
}
use of org.deeplearning4j.nn.conf.distribution.NormalDistribution in project deeplearning4j by deeplearning4j.
the class MultiLayerTestRNN method testRnnTimeStep2dInput.
@Test
public void testRnnTimeStep2dInput() {
Nd4j.getRandom().setSeed(12345);
int timeSeriesLength = 6;
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list().layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(5).nOut(7).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build()).layer(1, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build()).layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(8).nOut(4).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build()).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
INDArray input3d = Nd4j.rand(new int[] { 3, 5, timeSeriesLength });
INDArray out3d = mln.rnnTimeStep(input3d);
assertArrayEquals(out3d.shape(), new int[] { 3, 4, timeSeriesLength });
mln.rnnClearPreviousState();
for (int i = 0; i < timeSeriesLength; i++) {
INDArray input2d = input3d.tensorAlongDimension(i, 1, 0);
INDArray out2d = mln.rnnTimeStep(input2d);
assertArrayEquals(out2d.shape(), new int[] { 3, 4 });
INDArray expOut2d = out3d.tensorAlongDimension(i, 1, 0);
assertEquals(out2d, expOut2d);
}
//Check same but for input of size [3,5,1]. Expect [3,4,1] out
mln.rnnClearPreviousState();
for (int i = 0; i < timeSeriesLength; i++) {
INDArray temp = Nd4j.create(new int[] { 3, 5, 1 });
temp.tensorAlongDimension(0, 1, 0).assign(input3d.tensorAlongDimension(i, 1, 0));
INDArray out3dSlice = mln.rnnTimeStep(temp);
assertArrayEquals(out3dSlice.shape(), new int[] { 3, 4, 1 });
assertTrue(out3dSlice.tensorAlongDimension(0, 1, 0).equals(out3d.tensorAlongDimension(i, 1, 0)));
}
}
Aggregations