Search in sources :

Example 1 with FeedForwardToRnnPreProcessor

use of org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor in project deeplearning4j by deeplearning4j.

the class OutputLayerTest method testRnnOutputLayerIncEdgeCases.

@Test
public void testRnnOutputLayerIncEdgeCases() {
    //Basic test + test edge cases: timeSeriesLength==1, miniBatchSize==1, both
    int[] tsLength = { 5, 1, 5, 1 };
    int[] miniBatch = { 7, 7, 1, 1 };
    int nIn = 3;
    int nOut = 6;
    int layerSize = 4;
    FeedForwardToRnnPreProcessor proc = new FeedForwardToRnnPreProcessor();
    for (int t = 0; t < tsLength.length; t++) {
        Nd4j.getRandom().setSeed(12345);
        int timeSeriesLength = tsLength[t];
        int miniBatchSize = miniBatch[t];
        Random r = new Random(12345L);
        INDArray input = Nd4j.zeros(miniBatchSize, nIn, timeSeriesLength);
        for (int i = 0; i < miniBatchSize; i++) {
            for (int j = 0; j < nIn; j++) {
                for (int k = 0; k < timeSeriesLength; k++) {
                    input.putScalar(new int[] { i, j, k }, r.nextDouble() - 0.5);
                }
            }
        }
        INDArray labels3d = Nd4j.zeros(miniBatchSize, nOut, timeSeriesLength);
        for (int i = 0; i < miniBatchSize; i++) {
            for (int j = 0; j < timeSeriesLength; j++) {
                int idx = r.nextInt(nOut);
                labels3d.putScalar(new int[] { i, idx, j }, 1.0f);
            }
        }
        INDArray labels2d = proc.backprop(labels3d, miniBatchSize);
        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L).list().layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).activation(Activation.TANH).updater(Updater.NONE).build()).layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build()).inputPreProcessor(1, new RnnToFeedForwardPreProcessor()).pretrain(false).backprop(true).build();
        MultiLayerNetwork mln = new MultiLayerNetwork(conf);
        mln.init();
        INDArray out2d = mln.feedForward(input).get(2);
        INDArray out3d = proc.preProcess(out2d, miniBatchSize);
        MultiLayerConfiguration confRnn = new NeuralNetConfiguration.Builder().seed(12345L).list().layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).activation(Activation.TANH).updater(Updater.NONE).build()).layer(1, new org.deeplearning4j.nn.conf.layers.RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build()).pretrain(false).backprop(true).build();
        MultiLayerNetwork mlnRnn = new MultiLayerNetwork(confRnn);
        mlnRnn.init();
        INDArray outRnn = mlnRnn.feedForward(input).get(2);
        mln.setLabels(labels2d);
        mlnRnn.setLabels(labels3d);
        mln.computeGradientAndScore();
        mlnRnn.computeGradientAndScore();
        //score is average over all examples.
        //However: OutputLayer version has miniBatch*timeSeriesLength "examples" (after reshaping)
        //RnnOutputLayer has miniBatch examples
        //Hence: expect difference in scores by factor of timeSeriesLength
        double score = mln.score() * timeSeriesLength;
        double scoreRNN = mlnRnn.score();
        assertTrue(!Double.isNaN(score));
        assertTrue(!Double.isNaN(scoreRNN));
        double relError = Math.abs(score - scoreRNN) / (Math.abs(score) + Math.abs(scoreRNN));
        System.out.println(relError);
        assertTrue(relError < 1e-6);
        //Check labels and inputs for output layer:
        OutputLayer ol = (OutputLayer) mln.getOutputLayer();
        assertArrayEquals(ol.getInput().shape(), new int[] { miniBatchSize * timeSeriesLength, layerSize });
        assertArrayEquals(ol.getLabels().shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
        RnnOutputLayer rnnol = (RnnOutputLayer) mlnRnn.getOutputLayer();
        //assertArrayEquals(rnnol.getInput().shape(),new int[]{miniBatchSize,layerSize,timeSeriesLength});
        //Input may be set by BaseLayer methods. Thus input may end up as reshaped 2d version instead of original 3d version.
        //Not ideal, but everything else works.
        assertArrayEquals(rnnol.getLabels().shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
        //Check shapes of output for both:
        assertArrayEquals(out2d.shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
        INDArray out = mln.output(input);
        assertArrayEquals(out.shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
        INDArray act = mln.activate();
        assertArrayEquals(act.shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
        INDArray preout = mln.preOutput(input);
        assertArrayEquals(preout.shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
        INDArray outFFRnn = mlnRnn.feedForward(input).get(2);
        assertArrayEquals(outFFRnn.shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
        INDArray outRnn2 = mlnRnn.output(input);
        assertArrayEquals(outRnn2.shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
        INDArray actRnn = mlnRnn.activate();
        assertArrayEquals(actRnn.shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
        INDArray preoutRnn = mlnRnn.preOutput(input);
        assertArrayEquals(preoutRnn.shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
    }
}
Also used : RnnOutputLayer(org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer) RnnOutputLayer(org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) RnnToFeedForwardPreProcessor(org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) Random(java.util.Random) INDArray(org.nd4j.linalg.api.ndarray.INDArray) GravesLSTM(org.deeplearning4j.nn.conf.layers.GravesLSTM) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) FeedForwardToRnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Test(org.junit.Test)

Example 2 with FeedForwardToRnnPreProcessor

use of org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor in project deeplearning4j by deeplearning4j.

the class EmbeddingLayerTest method testEmbeddingLayerRNN.

@Test
public void testEmbeddingLayerRNN() {
    int nClassesIn = 10;
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().activation(Activation.TANH).list().layer(0, new EmbeddingLayer.Builder().nIn(nClassesIn).nOut(5).build()).layer(1, new GravesLSTM.Builder().nIn(5).nOut(7).activation(Activation.SOFTSIGN).build()).layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(7).nOut(4).activation(Activation.SOFTMAX).build()).inputPreProcessor(0, new RnnToFeedForwardPreProcessor()).inputPreProcessor(1, new FeedForwardToRnnPreProcessor()).pretrain(false).backprop(true).build();
    MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().activation(Activation.TANH).weightInit(WeightInit.XAVIER).list().layer(0, new DenseLayer.Builder().nIn(nClassesIn).nOut(5).build()).layer(1, new GravesLSTM.Builder().nIn(5).nOut(7).activation(Activation.SOFTSIGN).build()).layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(7).nOut(4).activation(Activation.SOFTMAX).build()).inputPreProcessor(0, new RnnToFeedForwardPreProcessor()).inputPreProcessor(1, new FeedForwardToRnnPreProcessor()).pretrain(false).backprop(true).build();
    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    MultiLayerNetwork net2 = new MultiLayerNetwork(conf2);
    net.init();
    net2.init();
    net2.setParams(net.params().dup());
    int batchSize = 3;
    int timeSeriesLength = 8;
    INDArray inEmbedding = Nd4j.create(batchSize, 1, timeSeriesLength);
    INDArray inOneHot = Nd4j.create(batchSize, nClassesIn, timeSeriesLength);
    INDArray outLabels = Nd4j.create(batchSize, 4, timeSeriesLength);
    Random r = new Random(12345);
    for (int i = 0; i < batchSize; i++) {
        for (int j = 0; j < timeSeriesLength; j++) {
            int classIdx = r.nextInt(nClassesIn);
            inEmbedding.putScalar(new int[] { i, 0, j }, classIdx);
            inOneHot.putScalar(new int[] { i, classIdx, j }, 1.0);
            int labelIdx = r.nextInt(4);
            outLabels.putScalar(new int[] { i, labelIdx, j }, 1.0);
        }
    }
    net.setInput(inEmbedding);
    net2.setInput(inOneHot);
    net.setLabels(outLabels);
    net2.setLabels(outLabels);
    net.computeGradientAndScore();
    net2.computeGradientAndScore();
    System.out.println(net.score() + "\t" + net2.score());
    assertEquals(net2.score(), net.score(), 1e-6);
    Map<String, INDArray> gradient = net.gradient().gradientForVariable();
    Map<String, INDArray> gradient2 = net2.gradient().gradientForVariable();
    assertEquals(gradient.size(), gradient2.size());
    for (String s : gradient.keySet()) {
        assertEquals(gradient2.get(s), gradient.get(s));
    }
}
Also used : NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) RnnToFeedForwardPreProcessor(org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Random(java.util.Random) FeedForwardToRnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor) EmbeddingLayer(org.deeplearning4j.nn.conf.layers.EmbeddingLayer) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Test(org.junit.Test)

Example 3 with FeedForwardToRnnPreProcessor

use of org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor in project deeplearning4j by deeplearning4j.

the class TestComputationGraphNetwork method testPreprocessorAddition.

@Test
public void testPreprocessorAddition() {
    //Also check that nIns are set automatically
    //First: check FF -> RNN
    ComputationGraphConfiguration conf1 = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in").setInputTypes(InputType.feedForward(5)).addLayer("rnn", new GravesLSTM.Builder().nOut(5).build(), "in").addLayer("out", new RnnOutputLayer.Builder().nOut(5).build(), "rnn").setOutputs("out").build();
    assertEquals(5, ((FeedForwardLayer) ((LayerVertex) conf1.getVertices().get("rnn")).getLayerConf().getLayer()).getNIn());
    assertEquals(5, ((FeedForwardLayer) ((LayerVertex) conf1.getVertices().get("out")).getLayerConf().getLayer()).getNIn());
    LayerVertex lv1 = (LayerVertex) conf1.getVertices().get("rnn");
    assertTrue(lv1.getPreProcessor() instanceof FeedForwardToRnnPreProcessor);
    LayerVertex lv2 = (LayerVertex) conf1.getVertices().get("out");
    assertNull(lv2.getPreProcessor());
    //Check RNN -> FF -> RNN
    ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in").setInputTypes(InputType.recurrent(5)).addLayer("ff", new DenseLayer.Builder().nOut(5).build(), "in").addLayer("out", new RnnOutputLayer.Builder().nOut(5).build(), "ff").setOutputs("out").build();
    assertEquals(5, ((FeedForwardLayer) ((LayerVertex) conf2.getVertices().get("ff")).getLayerConf().getLayer()).getNIn());
    assertEquals(5, ((FeedForwardLayer) ((LayerVertex) conf2.getVertices().get("out")).getLayerConf().getLayer()).getNIn());
    lv1 = (LayerVertex) conf2.getVertices().get("ff");
    assertTrue(lv1.getPreProcessor() instanceof RnnToFeedForwardPreProcessor);
    lv2 = (LayerVertex) conf2.getVertices().get("out");
    assertTrue(lv2.getPreProcessor() instanceof FeedForwardToRnnPreProcessor);
    //CNN -> Dense
    ComputationGraphConfiguration conf3 = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in").setInputTypes(InputType.convolutional(28, 28, 1)).addLayer("cnn", new ConvolutionLayer.Builder().kernelSize(2, 2).padding(0, 0).stride(2, 2).nOut(3).build(), //(28-2+0)/2+1 = 14
    "in").addLayer("pool", new SubsamplingLayer.Builder().kernelSize(2, 2).padding(0, 0).stride(2, 2).build(), //(14-2+0)/2+1=7
    "cnn").addLayer("dense", new DenseLayer.Builder().nOut(10).build(), "pool").addLayer("out", new OutputLayer.Builder().nIn(10).nOut(5).build(), "dense").setOutputs("out").build();
    //Check preprocessors:
    lv1 = (LayerVertex) conf3.getVertices().get("cnn");
    //Shouldn't be adding preprocessor here
    assertNull(lv1.getPreProcessor());
    lv2 = (LayerVertex) conf3.getVertices().get("pool");
    assertNull(lv2.getPreProcessor());
    LayerVertex lv3 = (LayerVertex) conf3.getVertices().get("dense");
    assertTrue(lv3.getPreProcessor() instanceof CnnToFeedForwardPreProcessor);
    CnnToFeedForwardPreProcessor proc = (CnnToFeedForwardPreProcessor) lv3.getPreProcessor();
    assertEquals(3, proc.getNumChannels());
    assertEquals(7, proc.getInputHeight());
    assertEquals(7, proc.getInputWidth());
    LayerVertex lv4 = (LayerVertex) conf3.getVertices().get("out");
    assertNull(lv4.getPreProcessor());
    //Check nIns:
    assertEquals(7 * 7 * 3, ((FeedForwardLayer) lv3.getLayerConf().getLayer()).getNIn());
    //CNN->Dense, RNN->Dense, Dense->RNN
    ComputationGraphConfiguration conf4 = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("inCNN", "inRNN").setInputTypes(InputType.convolutional(28, 28, 1), InputType.recurrent(5)).addLayer("cnn", new ConvolutionLayer.Builder().kernelSize(2, 2).padding(0, 0).stride(2, 2).nOut(3).build(), //(28-2+0)/2+1 = 14
    "inCNN").addLayer("pool", new SubsamplingLayer.Builder().kernelSize(2, 2).padding(0, 0).stride(2, 2).build(), //(14-2+0)/2+1=7
    "cnn").addLayer("dense", new DenseLayer.Builder().nOut(10).build(), "pool").addLayer("dense2", new DenseLayer.Builder().nOut(10).build(), "inRNN").addVertex("merge", new MergeVertex(), "dense", "dense2").addLayer("out", new RnnOutputLayer.Builder().nOut(5).build(), "merge").setOutputs("out").build();
    //Check preprocessors:
    lv1 = (LayerVertex) conf4.getVertices().get("cnn");
    //Expect no preprocessor: cnn data -> cnn layer
    assertNull(lv1.getPreProcessor());
    lv2 = (LayerVertex) conf4.getVertices().get("pool");
    assertNull(lv2.getPreProcessor());
    lv3 = (LayerVertex) conf4.getVertices().get("dense");
    assertTrue(lv3.getPreProcessor() instanceof CnnToFeedForwardPreProcessor);
    proc = (CnnToFeedForwardPreProcessor) lv3.getPreProcessor();
    assertEquals(3, proc.getNumChannels());
    assertEquals(7, proc.getInputHeight());
    assertEquals(7, proc.getInputWidth());
    lv4 = (LayerVertex) conf4.getVertices().get("dense2");
    assertTrue(lv4.getPreProcessor() instanceof RnnToFeedForwardPreProcessor);
    LayerVertex lv5 = (LayerVertex) conf4.getVertices().get("out");
    assertTrue(lv5.getPreProcessor() instanceof FeedForwardToRnnPreProcessor);
    //Check nIns:
    assertEquals(7 * 7 * 3, ((FeedForwardLayer) lv3.getLayerConf().getLayer()).getNIn());
    assertEquals(5, ((FeedForwardLayer) lv4.getLayerConf().getLayer()).getNIn());
    //10+10 out of the merge vertex -> 20 in to output layer vertex
    assertEquals(20, ((FeedForwardLayer) lv5.getLayerConf().getLayer()).getNIn());
    //Input to 2 CNN layers:
    ComputationGraphConfiguration conf5 = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder().addInputs("input").setInputTypes(InputType.convolutional(28, 28, 1)).addLayer("cnn_1", new ConvolutionLayer.Builder(2, 2).stride(2, 2).nIn(1).nOut(3).build(), "input").addLayer("cnn_2", new ConvolutionLayer.Builder(4, 4).stride(2, 2).padding(1, 1).nIn(1).nOut(3).build(), "input").addLayer("max_1", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2).build(), "cnn_1", "cnn_2").addLayer("output", new OutputLayer.Builder().nOut(10).build(), //.nIn(7 * 7 * 6)
    "max_1").setOutputs("output").pretrain(false).backprop(true).build();
    lv1 = (LayerVertex) conf5.getVertices().get("cnn_1");
    //Expect no preprocessor: cnn data -> cnn layer
    assertNull(lv1.getPreProcessor());
    lv2 = (LayerVertex) conf5.getVertices().get("cnn_2");
    //Expect no preprocessor: cnn data -> cnn layer
    assertNull(lv2.getPreProcessor());
    assertNull(((LayerVertex) conf5.getVertices().get("max_1")).getPreProcessor());
    lv3 = (LayerVertex) conf5.getVertices().get("output");
    assertTrue(lv3.getPreProcessor() instanceof CnnToFeedForwardPreProcessor);
    CnnToFeedForwardPreProcessor cnnff = (CnnToFeedForwardPreProcessor) lv3.getPreProcessor();
    assertEquals(6, cnnff.getNumChannels());
    assertEquals(7, cnnff.getInputHeight());
    assertEquals(7, cnnff.getInputWidth());
}
Also used : LayerVertex(org.deeplearning4j.nn.conf.graph.LayerVertex) CnnToFeedForwardPreProcessor(org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor) MergeVertex(org.deeplearning4j.nn.conf.graph.MergeVertex) RnnToFeedForwardPreProcessor(org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor) FeedForwardToRnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor) Test(org.junit.Test)

Example 4 with FeedForwardToRnnPreProcessor

use of org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor in project deeplearning4j by deeplearning4j.

the class ComputationGraphTestRNN method testRnnTimeStepMultipleInOut.

@Test
public void testRnnTimeStepMultipleInOut() {
    //Test rnnTimeStep functionality with multiple inputs and outputs...
    Nd4j.getRandom().setSeed(12345);
    int timeSeriesLength = 12;
    //4 layer network: 2 GravesLSTM + DenseLayer + RnnOutputLayer. Hence also tests preprocessors.
    //Network architecture: lstm0 -> Dense -> RnnOutputLayer0
    // and lstm1 -> Dense -> RnnOutputLayer1
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).graphBuilder().addInputs("in0", "in1").addLayer("lstm0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(5).nOut(6).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "in0").addLayer("lstm1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(4).nOut(5).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "in1").addLayer("dense", new DenseLayer.Builder().nIn(6 + 5).nOut(9).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "lstm0", "lstm1").addLayer("out0", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(9).nOut(3).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "dense").addLayer("out1", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(9).nOut(4).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "dense").setOutputs("out0", "out1").inputPreProcessor("dense", new RnnToFeedForwardPreProcessor()).inputPreProcessor("out0", new FeedForwardToRnnPreProcessor()).inputPreProcessor("out1", new FeedForwardToRnnPreProcessor()).pretrain(false).backprop(true).build();
    ComputationGraph graph = new ComputationGraph(conf);
    graph.init();
    INDArray input0 = Nd4j.rand(new int[] { 3, 5, timeSeriesLength });
    INDArray input1 = Nd4j.rand(new int[] { 3, 4, timeSeriesLength });
    Map<String, INDArray> allOutputActivations = graph.feedForward(new INDArray[] { input0, input1 }, true);
    INDArray fullActLSTM0 = allOutputActivations.get("lstm0");
    INDArray fullActLSTM1 = allOutputActivations.get("lstm1");
    INDArray fullActOut0 = allOutputActivations.get("out0");
    INDArray fullActOut1 = allOutputActivations.get("out1");
    assertArrayEquals(new int[] { 3, 6, timeSeriesLength }, fullActLSTM0.shape());
    assertArrayEquals(new int[] { 3, 5, timeSeriesLength }, fullActLSTM1.shape());
    assertArrayEquals(new int[] { 3, 3, timeSeriesLength }, fullActOut0.shape());
    assertArrayEquals(new int[] { 3, 4, timeSeriesLength }, fullActOut1.shape());
    int[] inputLengths = { 1, 2, 3, 4, 6, 12 };
    //Should get the same result regardless of step size; should be identical to standard forward pass
    for (int i = 0; i < inputLengths.length; i++) {
        int inLength = inputLengths[i];
        //each of length inLength
        int nSteps = timeSeriesLength / inLength;
        graph.rnnClearPreviousState();
        for (int j = 0; j < nSteps; j++) {
            int startTimeRange = j * inLength;
            int endTimeRange = startTimeRange + inLength;
            INDArray inputSubset0 = input0.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
            if (inLength > 1)
                assertTrue(inputSubset0.size(2) == inLength);
            INDArray inputSubset1 = input1.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
            if (inLength > 1)
                assertTrue(inputSubset1.size(2) == inLength);
            INDArray[] outArr = graph.rnnTimeStep(inputSubset0, inputSubset1);
            assertEquals(2, outArr.length);
            INDArray out0 = outArr[0];
            INDArray out1 = outArr[1];
            INDArray expOutSubset0;
            if (inLength == 1) {
                int[] sizes = new int[] { fullActOut0.size(0), fullActOut0.size(1), 1 };
                expOutSubset0 = Nd4j.create(sizes);
                expOutSubset0.tensorAlongDimension(0, 1, 0).assign(fullActOut0.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(startTimeRange)));
            } else {
                expOutSubset0 = fullActOut0.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
            }
            INDArray expOutSubset1;
            if (inLength == 1) {
                int[] sizes = new int[] { fullActOut1.size(0), fullActOut1.size(1), 1 };
                expOutSubset1 = Nd4j.create(sizes);
                expOutSubset1.tensorAlongDimension(0, 1, 0).assign(fullActOut1.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(startTimeRange)));
            } else {
                expOutSubset1 = fullActOut1.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
            }
            assertEquals(expOutSubset0, out0);
            assertEquals(expOutSubset1, out1);
            Map<String, INDArray> currLSTM0State = graph.rnnGetPreviousState("lstm0");
            Map<String, INDArray> currLSTM1State = graph.rnnGetPreviousState("lstm1");
            INDArray lastActL0 = currLSTM0State.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
            INDArray lastActL1 = currLSTM1State.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
            INDArray expLastActL0 = fullActLSTM0.tensorAlongDimension(endTimeRange - 1, 1, 0);
            INDArray expLastActL1 = fullActLSTM1.tensorAlongDimension(endTimeRange - 1, 1, 0);
            assertEquals(expLastActL0, lastActL0);
            assertEquals(expLastActL1, lastActL1);
        }
    }
}
Also used : RnnOutputLayer(org.deeplearning4j.nn.conf.layers.RnnOutputLayer) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) RnnToFeedForwardPreProcessor(org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor) GravesLSTM(org.deeplearning4j.nn.layers.recurrent.GravesLSTM) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) FeedForwardToRnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor) Test(org.junit.Test)

Example 5 with FeedForwardToRnnPreProcessor

use of org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor in project deeplearning4j by deeplearning4j.

the class TestVariableLengthTS method testInputMasking.

@Test
public void testInputMasking() {
    //Idea: have masking on the input with 2 dense layers on input
    //Ensure that the parameter gradients for the inputs don't depend on the inputs when inputs are masked
    int[] miniBatchSizes = { 1, 2, 5 };
    int nIn = 2;
    Random r = new Random(12345);
    for (int nExamples : miniBatchSizes) {
        Nd4j.getRandom().setSeed(12345);
        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).updater(Updater.SGD).learningRate(0.1).seed(12345).list().layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build()).layer(1, new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build()).layer(2, new GravesLSTM.Builder().activation(Activation.TANH).nIn(2).nOut(2).build()).layer(3, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(2).nOut(1).build()).inputPreProcessor(0, new RnnToFeedForwardPreProcessor()).inputPreProcessor(2, new FeedForwardToRnnPreProcessor()).build();
        MultiLayerNetwork net = new MultiLayerNetwork(conf);
        net.init();
        INDArray in1 = Nd4j.rand(new int[] { nExamples, 2, 4 });
        INDArray in2 = Nd4j.rand(new int[] { nExamples, 2, 5 });
        in2.put(new INDArrayIndex[] { NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 3, true) }, in1);
        assertEquals(in1, in2.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4)));
        INDArray labels1 = Nd4j.rand(new int[] { nExamples, 1, 4 });
        INDArray labels2 = Nd4j.create(nExamples, 1, 5);
        labels2.put(new INDArrayIndex[] { NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 3, true) }, labels1);
        assertEquals(labels1, labels2.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4)));
        INDArray inputMask = Nd4j.ones(nExamples, 5);
        for (int j = 0; j < nExamples; j++) {
            inputMask.putScalar(new int[] { j, 4 }, 0);
        }
        net.setInput(in1);
        net.setLabels(labels1);
        net.computeGradientAndScore();
        double score1 = net.score();
        Gradient g1 = net.gradient();
        Map<String, INDArray> map1 = g1.gradientForVariable();
        for (String s : map1.keySet()) {
            //Note: gradients are a view normally -> second computeGradientAndScore would have modified the original gradient map values...
            map1.put(s, map1.get(s).dup());
        }
        net.setInput(in2);
        net.setLabels(labels2);
        net.setLayerMaskArrays(inputMask, null);
        net.computeGradientAndScore();
        double score2 = net.score();
        Gradient g2 = net.gradient();
        List<INDArray> activations2 = net.feedForward();
        //Scores should differ here: masking the input, not the output. Therefore 4 vs. 5 time step outputs
        assertNotEquals(score1, score2, 0.01);
        Map<String, INDArray> g1map = g1.gradientForVariable();
        Map<String, INDArray> g2map = g2.gradientForVariable();
        for (String s : g1map.keySet()) {
            INDArray g1s = g1map.get(s);
            INDArray g2s = g2map.get(s);
            System.out.println("-------");
            System.out.println("Variable: " + s);
            System.out.println(Arrays.toString(g1s.dup().data().asFloat()));
            System.out.println(Arrays.toString(g2s.dup().data().asFloat()));
            assertNotEquals(s, g1s, g2s);
        }
        //Modify the values at the masked time step, and check that neither the gradients, score or activations change
        for (int j = 0; j < nExamples; j++) {
            for (int k = 0; k < nIn; k++) {
                in2.putScalar(new int[] { j, k, 4 }, r.nextDouble());
            }
            net.setInput(in2);
            net.computeGradientAndScore();
            double score2a = net.score();
            Gradient g2a = net.gradient();
            assertEquals(score2, score2a, 1e-12);
            for (String s : g2.gradientForVariable().keySet()) {
                assertEquals(g2.getGradientFor(s), g2a.getGradientFor(s));
            }
            List<INDArray> activations2a = net.feedForward();
            for (int k = 1; k < activations2.size(); k++) {
                assertEquals(activations2.get(k), activations2a.get(k));
            }
        }
        //Finally: check that the activations for the first two (dense) layers are zero at the appropriate time step
        FeedForwardToRnnPreProcessor temp = new FeedForwardToRnnPreProcessor();
        INDArray l0Before = activations2.get(1);
        INDArray l1Before = activations2.get(2);
        INDArray l0After = temp.preProcess(l0Before, nExamples);
        INDArray l1After = temp.preProcess(l1Before, nExamples);
        for (int j = 0; j < nExamples; j++) {
            for (int k = 0; k < nIn; k++) {
                assertEquals(0.0, l0After.getDouble(j, k, 4), 0.0);
                assertEquals(0.0, l1After.getDouble(j, k, 4), 0.0);
            }
        }
    }
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) RnnToFeedForwardPreProcessor(org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) INDArray(org.nd4j.linalg.api.ndarray.INDArray) FeedForwardToRnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor) Test(org.junit.Test)

Aggregations

FeedForwardToRnnPreProcessor (org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor)15 Test (org.junit.Test)14 INDArray (org.nd4j.linalg.api.ndarray.INDArray)12 RnnToFeedForwardPreProcessor (org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor)10 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)8 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)6 Random (java.util.Random)5 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)5 GravesLSTM (org.deeplearning4j.nn.layers.recurrent.GravesLSTM)5 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)5 NormalDistribution (org.deeplearning4j.nn.conf.distribution.NormalDistribution)4 RnnOutputLayer (org.deeplearning4j.nn.conf.layers.RnnOutputLayer)4 CnnToFeedForwardPreProcessor (org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor)3 ComputationGraph (org.deeplearning4j.nn.graph.ComputationGraph)3 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)2 EmbeddingLayer (org.deeplearning4j.nn.conf.layers.EmbeddingLayer)2 RnnToCnnPreProcessor (org.deeplearning4j.nn.conf.preprocessor.RnnToCnnPreProcessor)2 Gradient (org.deeplearning4j.nn.gradient.Gradient)2 ArrayList (java.util.ArrayList)1 UniformDistribution (org.deeplearning4j.nn.conf.distribution.UniformDistribution)1