use of org.deeplearning4j.nn.conf.distribution.NormalDistribution in project deeplearning4j by deeplearning4j.
the class GradientCheckTestsMasking method testBidirectionalLSTMMasking.
public void testBidirectionalLSTMMasking() {
//Basic test of GravesLSTM layer
int timeSeriesLength = 5;
int nIn = 5;
int layerSize = 4;
int nOut = 3;
int miniBatchSize = 3;
INDArray[] masks = new INDArray[] { null, Nd4j.create(new double[][] { { 1, 1, 1, 1, 1 }, { 1, 1, 1, 1, 1 }, { 1, 1, 1, 1, 1 } }), Nd4j.create(new double[][] { { 1, 1, 1, 1, 1 }, { 1, 1, 1, 1, 0 }, { 1, 1, 1, 0, 0 } }), Nd4j.create(new double[][] { { 1, 1, 1, 1, 1 }, { 0, 1, 1, 1, 1 }, { 0, 0, 1, 1, 1 } }) };
int testNum = 0;
for (INDArray mask : masks) {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false).updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1.0)).seed(12345L).list().layer(0, new GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build()).layer(1, new GravesBidirectionalLSTM.Builder().nIn(layerSize).nOut(layerSize).activation(Activation.TANH).build()).layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut).build()).pretrain(false).backprop(true).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
Random r = new Random(12345L);
INDArray input = Nd4j.zeros(miniBatchSize, nIn, timeSeriesLength);
for (int i = 0; i < miniBatchSize; i++) {
for (int j = 0; j < nIn; j++) {
for (int k = 0; k < timeSeriesLength; k++) {
input.putScalar(new int[] { i, j, k }, r.nextDouble() - 0.5);
INDArray labels = Nd4j.zeros(miniBatchSize, nOut, timeSeriesLength);
for (int i = 0; i < miniBatchSize; i++) {
for (int j = 0; j < nIn; j++) {
labels.putScalar(i, r.nextInt(nOut), j, 1.0);
mln.setLayerMaskArrays(mask, mask);
System.out.println("testBidirectionalLSTMMasking() - testNum = " + testNum++);
for (int j = 0; j < mln.getnLayers(); j++) System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
the class GradientCheckTestsMasking method testPerOutputMaskingRnn.
public void testPerOutputMaskingRnn() {
//For RNNs: per-output masking uses 3d masks (same shape as output/labels), as compared to the standard
// 2d masks (used for per *example* masking)
int nIn = 4;
int layerSize = 4;
int nOut = 4;
//1 example, TS length 3
INDArray mask1 = Nd4j.create(new double[] { 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0 }, new int[] { 1, nOut, 3 }, 'f');
//1 example, TS length 1
INDArray mask2 = Nd4j.create(new double[] { 1, 1, 0, 1 }, new int[] { 1, nOut, 1 }, 'f');
//3 examples, TS length 3
INDArray mask3 = Nd4j.create(new double[] { // step) followed by time index (least frequently)
1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0 }, new int[] { 3, nOut, 3 }, 'f');
INDArray[] labelMasks = new INDArray[] { mask1, mask2, mask3 };
ILossFunction[] lossFunctions = new ILossFunction[] { new LossBinaryXENT(), // new LossCosineProximity(), //Doesn't support per-output masking, as it doesn't make sense for cosine proximity
new LossHinge(), new LossKLD(), new LossKLD(), new LossL1(), new LossL2(), new LossMAE(), new LossMAE(), new LossMAPE(), new LossMAPE(), // new LossMCXENT(), //Per output masking on MCXENT+Softmax: not yet supported
new LossMCXENT(), new LossMSE(), new LossMSE(), new LossMSLE(), new LossMSLE(), new LossNegativeLogLikelihood(), new LossPoisson(), new LossSquaredHinge() };
Activation[] act = new Activation[] { //XENT
Activation.SIGMOID, //Hinge
Activation.TANH, //KLD
Activation.SIGMOID, //KLD + softmax
Activation.SOFTMAX, //L1
Activation.TANH, //L2
Activation.TANH, //MAE
Activation.TANH, //MAE + softmax
Activation.SOFTMAX, //MAPE
Activation.TANH, //MAPE + softmax
Activation.SOFTMAX, //MCXENT + sigmoid
Activation.SIGMOID, //MSE
Activation.TANH, //MSE + softmax
Activation.SOFTMAX, //MSLE - needs positive labels/activations (due to log)
Activation.SIGMOID, //MSLE + softmax
Activation.SOFTMAX, //NLL
Activation.SIGMOID, //Poisson
Activation.SIGMOID, //Squared hinge
Activation.TANH };
for (INDArray labelMask : labelMasks) {
int minibatch = labelMask.size(0);
int tsLength = labelMask.size(2);
for (int i = 0; i < lossFunctions.length; i++) {
ILossFunction lf = lossFunctions[i];
Activation a = act[i];
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).seed(12345).list().layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build()).layer(1, new RnnOutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf).activation(a).build()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.setLayerMaskArrays(null, labelMask);
INDArray[] fl = LossFunctionGradientCheck.getFeaturesAndLabels(lf, new int[] { minibatch, nIn, tsLength }, new int[] { minibatch, nOut, tsLength }, 12345);
INDArray features = fl[0];
INDArray labels = fl[1];
String msg = "testPerOutputMaskingRnn(): maskShape = " + Arrays.toString(labelMask.shape()) + ", loss function = " + lf + ", activation = " + a;
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, features, labels);
assertTrue(msg, gradOK);
//Check the equivalent compgraph:
ComputationGraphConfiguration cg = new NeuralNetConfiguration.Builder().updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 2)).seed(12345).graphBuilder().addInputs("in").addLayer("0", new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build(), "in").addLayer("1", new RnnOutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf).activation(a).build(), "0").setOutputs("1").build();
ComputationGraph graph = new ComputationGraph(cg);
net.setLayerMaskArrays(null, labelMask);
gradOK = GradientCheckUtil.checkGradients(graph, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, new INDArray[] { features }, new INDArray[] { labels });
assertTrue(msg + " (compgraph)", gradOK);
the class TestVariableLengthTSCG method testOutputMaskingScoreMagnitudes.
public void testOutputMaskingScoreMagnitudes() {
//Idea: check magnitude of scores, with differing number of values masked out
//i.e., MSE with zero weight init and 1.0 labels: know what to expect in terms of score
int nIn = 3;
int[] timeSeriesLengths = { 3, 10 };
int[] outputSizes = { 1, 2, 5 };
int[] miniBatchSizes = { 1, 4 };
Random r = new Random(12345);
for (int tsLength : timeSeriesLengths) {
for (int nOut : outputSizes) {
for (int miniBatch : miniBatchSizes) {
for (int nToMask = 0; nToMask < tsLength - 1; nToMask++) {
String msg = "tsLen=" + tsLength + ", nOut=" + nOut + ", miniBatch=" + miniBatch;
INDArray labelMaskArray = Nd4j.ones(miniBatch, tsLength);
for (int i = 0; i < miniBatch; i++) {
//For each example: select which outputs to mask...
int nMasked = 0;
while (nMasked < nToMask) {
int tryIdx = r.nextInt(tsLength);
if (labelMaskArray.getDouble(i, tryIdx) == 0.0)
labelMaskArray.putScalar(new int[] { i, tryIdx }, 0.0);
INDArray input = Nd4j.rand(new int[] { miniBatch, nIn, tsLength });
INDArray labels = Nd4j.ones(miniBatch, nOut, tsLength);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false).seed(12345L).graphBuilder().addInputs("in").addLayer("0", new GravesLSTM.Builder().nIn(nIn).nOut(5).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build(), "in").addLayer("1", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY).nIn(5).nOut(nOut).weightInit(WeightInit.ZERO).updater(Updater.NONE).build(), "0").setOutputs("1").pretrain(false).backprop(true).build();
ComputationGraph net = new ComputationGraph(conf);
//MSE loss function: 1/n * sum(squaredErrors)... but sum(squaredErrors) = n * (1-0) here -> sum(squaredErrors)
//Sum over minibatches, then divide by minibatch size
double expScore = tsLength - nToMask;
net.setLayerMaskArrays(null, new INDArray[] { labelMaskArray });
net.setInput(0, input);
net.setLabel(0, labels);
double score = net.score();
assertEquals(msg, expScore, score, 0.1);
the class ComputationGraphTestRNN method testRnnTimeStepMultipleInOut.
public void testRnnTimeStepMultipleInOut() {
//Test rnnTimeStep functionality with multiple inputs and outputs...
int timeSeriesLength = 12;
//4 layer network: 2 GravesLSTM + DenseLayer + RnnOutputLayer. Hence also tests preprocessors.
//Network architecture: lstm0 -> Dense -> RnnOutputLayer0
// and lstm1 -> Dense -> RnnOutputLayer1
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).graphBuilder().addInputs("in0", "in1").addLayer("lstm0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(5).nOut(6).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "in0").addLayer("lstm1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(4).nOut(5).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "in1").addLayer("dense", new DenseLayer.Builder().nIn(6 + 5).nOut(9).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "lstm0", "lstm1").addLayer("out0", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(9).nOut(3).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "dense").addLayer("out1", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(9).nOut(4).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "dense").setOutputs("out0", "out1").inputPreProcessor("dense", new RnnToFeedForwardPreProcessor()).inputPreProcessor("out0", new FeedForwardToRnnPreProcessor()).inputPreProcessor("out1", new FeedForwardToRnnPreProcessor()).pretrain(false).backprop(true).build();
ComputationGraph graph = new ComputationGraph(conf);
INDArray input0 = Nd4j.rand(new int[] { 3, 5, timeSeriesLength });
INDArray input1 = Nd4j.rand(new int[] { 3, 4, timeSeriesLength });
Map<String, INDArray> allOutputActivations = graph.feedForward(new INDArray[] { input0, input1 }, true);
INDArray fullActLSTM0 = allOutputActivations.get("lstm0");
INDArray fullActLSTM1 = allOutputActivations.get("lstm1");
INDArray fullActOut0 = allOutputActivations.get("out0");
INDArray fullActOut1 = allOutputActivations.get("out1");
assertArrayEquals(new int[] { 3, 6, timeSeriesLength }, fullActLSTM0.shape());
assertArrayEquals(new int[] { 3, 5, timeSeriesLength }, fullActLSTM1.shape());
assertArrayEquals(new int[] { 3, 3, timeSeriesLength }, fullActOut0.shape());
assertArrayEquals(new int[] { 3, 4, timeSeriesLength }, fullActOut1.shape());
int[] inputLengths = { 1, 2, 3, 4, 6, 12 };
//Should get the same result regardless of step size; should be identical to standard forward pass
for (int i = 0; i < inputLengths.length; i++) {
int inLength = inputLengths[i];
//each of length inLength
int nSteps = timeSeriesLength / inLength;
for (int j = 0; j < nSteps; j++) {
int startTimeRange = j * inLength;
int endTimeRange = startTimeRange + inLength;
INDArray inputSubset0 = input0.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
if (inLength > 1)
assertTrue(inputSubset0.size(2) == inLength);
INDArray inputSubset1 = input1.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
if (inLength > 1)
assertTrue(inputSubset1.size(2) == inLength);
INDArray[] outArr = graph.rnnTimeStep(inputSubset0, inputSubset1);
assertEquals(2, outArr.length);
INDArray out0 = outArr[0];
INDArray out1 = outArr[1];
INDArray expOutSubset0;
if (inLength == 1) {
int[] sizes = new int[] { fullActOut0.size(0), fullActOut0.size(1), 1 };
expOutSubset0 = Nd4j.create(sizes);
expOutSubset0.tensorAlongDimension(0, 1, 0).assign(fullActOut0.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(startTimeRange)));
} else {
expOutSubset0 = fullActOut0.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
INDArray expOutSubset1;
if (inLength == 1) {
int[] sizes = new int[] { fullActOut1.size(0), fullActOut1.size(1), 1 };
expOutSubset1 = Nd4j.create(sizes);
expOutSubset1.tensorAlongDimension(0, 1, 0).assign(fullActOut1.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(startTimeRange)));
} else {
expOutSubset1 = fullActOut1.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
assertEquals(expOutSubset0, out0);
assertEquals(expOutSubset1, out1);
Map<String, INDArray> currLSTM0State = graph.rnnGetPreviousState("lstm0");
Map<String, INDArray> currLSTM1State = graph.rnnGetPreviousState("lstm1");
INDArray lastActL0 = currLSTM0State.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
INDArray lastActL1 = currLSTM1State.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
INDArray expLastActL0 = fullActLSTM0.tensorAlongDimension(endTimeRange - 1, 1, 0);
INDArray expLastActL1 = fullActLSTM1.tensorAlongDimension(endTimeRange - 1, 1, 0);
assertEquals(expLastActL0, lastActL0);
assertEquals(expLastActL1, lastActL1);
the class ComputationGraphTestRNN method testRnnTimeStep2dInput.
public void testRnnTimeStep2dInput() {
int timeSeriesLength = 6;
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in").addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(5).nOut(7).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "in").addLayer("1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "0").addLayer("2", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(8).nOut(4).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "1").setOutputs("2").build();
ComputationGraph graph = new ComputationGraph(conf);
INDArray input3d = Nd4j.rand(new int[] { 3, 5, timeSeriesLength });
INDArray out3d = graph.rnnTimeStep(input3d)[0];
assertArrayEquals(out3d.shape(), new int[] { 3, 4, timeSeriesLength });
for (int i = 0; i < timeSeriesLength; i++) {
INDArray input2d = input3d.tensorAlongDimension(i, 1, 0);
INDArray out2d = graph.rnnTimeStep(input2d)[0];
assertArrayEquals(out2d.shape(), new int[] { 3, 4 });
INDArray expOut2d = out3d.tensorAlongDimension(i, 1, 0);
assertEquals(out2d, expOut2d);
//Check same but for input of size [3,5,1]. Expect [3,4,1] out
for (int i = 0; i < timeSeriesLength; i++) {
INDArray temp = Nd4j.create(new int[] { 3, 5, 1 });
temp.tensorAlongDimension(0, 1, 0).assign(input3d.tensorAlongDimension(i, 1, 0));
INDArray out3dSlice = graph.rnnTimeStep(temp)[0];
assertArrayEquals(out3dSlice.shape(), new int[] { 3, 4, 1 });
assertTrue(out3dSlice.tensorAlongDimension(0, 1, 0).equals(out3d.tensorAlongDimension(i, 1, 0)));