Search in sources :

Example 61 with ComputationGraphConfiguration

use of org.deeplearning4j.nn.conf.ComputationGraphConfiguration in project deeplearning4j by deeplearning4j.

the class ComputationGraphTestRNN method testTruncatedBPTTVsBPTT.

@Test
public void testTruncatedBPTTVsBPTT() {
    //Under some (limited) circumstances, we expect BPTT and truncated BPTT to be identical
    //Specifically TBPTT over entire data vector
    int timeSeriesLength = 12;
    int miniBatchSize = 7;
    int nIn = 5;
    int nOut = 4;
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).graphBuilder().addInputs("in").addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "in").addLayer("1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "0").addLayer("out", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(8).nOut(nOut).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "1").setOutputs("out").backprop(true).build();
    assertEquals(BackpropType.Standard, conf.getBackpropType());
    ComputationGraphConfiguration confTBPTT = new NeuralNetConfiguration.Builder().seed(12345).graphBuilder().addInputs("in").addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "in").addLayer("1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "0").addLayer("out", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(8).nOut(nOut).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "1").setOutputs("out").backprop(true).backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(timeSeriesLength).tBPTTBackwardLength(timeSeriesLength).build();
    assertEquals(BackpropType.TruncatedBPTT, confTBPTT.getBackpropType());
    Nd4j.getRandom().setSeed(12345);
    ComputationGraph graph = new ComputationGraph(conf);
    graph.init();
    Nd4j.getRandom().setSeed(12345);
    ComputationGraph graphTBPTT = new ComputationGraph(confTBPTT);
    graphTBPTT.init();
    assertTrue(graphTBPTT.getConfiguration().getBackpropType() == BackpropType.TruncatedBPTT);
    assertTrue(graphTBPTT.getConfiguration().getTbpttFwdLength() == timeSeriesLength);
    assertTrue(graphTBPTT.getConfiguration().getTbpttBackLength() == timeSeriesLength);
    INDArray inputData = Nd4j.rand(new int[] { miniBatchSize, nIn, timeSeriesLength });
    INDArray labels = Nd4j.rand(new int[] { miniBatchSize, nOut, timeSeriesLength });
    graph.setInput(0, inputData);
    graph.setLabel(0, labels);
    graphTBPTT.setInput(0, inputData);
    graphTBPTT.setLabel(0, labels);
    graph.computeGradientAndScore();
    graphTBPTT.computeGradientAndScore();
    Pair<Gradient, Double> graphPair = graph.gradientAndScore();
    Pair<Gradient, Double> graphTbpttPair = graphTBPTT.gradientAndScore();
    assertEquals(graphPair.getFirst().gradientForVariable(), graphTbpttPair.getFirst().gradientForVariable());
    assertEquals(graphPair.getSecond(), graphTbpttPair.getSecond());
    //Check states: expect stateMap to be empty but tBpttStateMap to not be
    Map<String, INDArray> l0StateMLN = graph.rnnGetPreviousState(0);
    Map<String, INDArray> l0StateTBPTT = graphTBPTT.rnnGetPreviousState(0);
    Map<String, INDArray> l1StateMLN = graph.rnnGetPreviousState(0);
    Map<String, INDArray> l1StateTBPTT = graphTBPTT.rnnGetPreviousState(0);
    Map<String, INDArray> l0TBPTTState = ((BaseRecurrentLayer<?>) graph.getLayer(0)).rnnGetTBPTTState();
    Map<String, INDArray> l0TBPTTStateTBPTT = ((BaseRecurrentLayer<?>) graphTBPTT.getLayer(0)).rnnGetTBPTTState();
    Map<String, INDArray> l1TBPTTState = ((BaseRecurrentLayer<?>) graph.getLayer(1)).rnnGetTBPTTState();
    Map<String, INDArray> l1TBPTTStateTBPTT = ((BaseRecurrentLayer<?>) graphTBPTT.getLayer(1)).rnnGetTBPTTState();
    assertTrue(l0StateMLN.isEmpty());
    assertTrue(l0StateTBPTT.isEmpty());
    assertTrue(l1StateMLN.isEmpty());
    assertTrue(l1StateTBPTT.isEmpty());
    assertTrue(l0TBPTTState.isEmpty());
    assertTrue(l0TBPTTStateTBPTT.size() == 2);
    assertTrue(l1TBPTTState.isEmpty());
    assertTrue(l1TBPTTStateTBPTT.size() == 2);
    INDArray tbpttActL0 = l0TBPTTStateTBPTT.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
    INDArray tbpttActL1 = l1TBPTTStateTBPTT.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
    Map<String, INDArray> activations = graph.feedForward(inputData, true);
    INDArray l0Act = activations.get("0");
    INDArray l1Act = activations.get("1");
    INDArray expL0Act = l0Act.tensorAlongDimension(timeSeriesLength - 1, 1, 0);
    INDArray expL1Act = l1Act.tensorAlongDimension(timeSeriesLength - 1, 1, 0);
    assertEquals(tbpttActL0, expL0Act);
    assertEquals(tbpttActL1, expL1Act);
}
Also used : RnnOutputLayer(org.deeplearning4j.nn.conf.layers.RnnOutputLayer) Gradient(org.deeplearning4j.nn.gradient.Gradient) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) GravesLSTM(org.deeplearning4j.nn.layers.recurrent.GravesLSTM) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) BaseRecurrentLayer(org.deeplearning4j.nn.layers.recurrent.BaseRecurrentLayer) Test(org.junit.Test)

Example 62 with ComputationGraphConfiguration

use of org.deeplearning4j.nn.conf.ComputationGraphConfiguration in project deeplearning4j by deeplearning4j.

the class ComputationGraphTestRNN method testTBPTTLongerThanTS.

@Test
public void testTBPTTLongerThanTS() {
    int tbpttLength = 100;
    int timeSeriesLength = 20;
    int miniBatchSize = 7;
    int nIn = 5;
    int nOut = 4;
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder().addInputs("in").addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "in").addLayer("1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "0").addLayer("out", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(8).nOut(nOut).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "1").setOutputs("out").pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT).tBPTTBackwardLength(tbpttLength).tBPTTForwardLength(tbpttLength).build();
    Nd4j.getRandom().setSeed(12345);
    ComputationGraph graph = new ComputationGraph(conf);
    graph.init();
    INDArray inputLong = Nd4j.rand(new int[] { miniBatchSize, nIn, timeSeriesLength });
    INDArray labelsLong = Nd4j.rand(new int[] { miniBatchSize, nOut, timeSeriesLength });
    INDArray initialParams = graph.params().dup();
    graph.fit(new INDArray[] { inputLong }, new INDArray[] { labelsLong });
    INDArray afterParams = graph.params();
    assertNotEquals(initialParams, afterParams);
}
Also used : RnnOutputLayer(org.deeplearning4j.nn.conf.layers.RnnOutputLayer) GravesLSTM(org.deeplearning4j.nn.layers.recurrent.GravesLSTM) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) Test(org.junit.Test)

Example 63 with ComputationGraphConfiguration

use of org.deeplearning4j.nn.conf.ComputationGraphConfiguration in project deeplearning4j by deeplearning4j.

the class ComputationGraphTestRNN method testRnnTimeStepGravesLSTM.

@Test
public void testRnnTimeStepGravesLSTM() {
    Nd4j.getRandom().setSeed(12345);
    int timeSeriesLength = 12;
    //4 layer network: 2 GravesLSTM + DenseLayer + RnnOutputLayer. Hence also tests preprocessors.
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).graphBuilder().addInputs("in").addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(5).nOut(7).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "in").addLayer("1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "0").addLayer("2", new DenseLayer.Builder().nIn(8).nOut(9).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "1").addLayer("3", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(9).nOut(4).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "2").setOutputs("3").inputPreProcessor("2", new RnnToFeedForwardPreProcessor()).inputPreProcessor("3", new FeedForwardToRnnPreProcessor()).pretrain(false).backprop(true).build();
    ComputationGraph graph = new ComputationGraph(conf);
    graph.init();
    INDArray input = Nd4j.rand(new int[] { 3, 5, timeSeriesLength });
    Map<String, INDArray> allOutputActivations = graph.feedForward(input, true);
    INDArray fullOutL0 = allOutputActivations.get("0");
    INDArray fullOutL1 = allOutputActivations.get("1");
    INDArray fullOutL3 = allOutputActivations.get("3");
    assertArrayEquals(new int[] { 3, 7, timeSeriesLength }, fullOutL0.shape());
    assertArrayEquals(new int[] { 3, 8, timeSeriesLength }, fullOutL1.shape());
    assertArrayEquals(new int[] { 3, 4, timeSeriesLength }, fullOutL3.shape());
    int[] inputLengths = { 1, 2, 3, 4, 6, 12 };
    //Should get the same result regardless of step size; should be identical to standard forward pass
    for (int i = 0; i < inputLengths.length; i++) {
        int inLength = inputLengths[i];
        //each of length inLength
        int nSteps = timeSeriesLength / inLength;
        graph.rnnClearPreviousState();
        for (int j = 0; j < nSteps; j++) {
            int startTimeRange = j * inLength;
            int endTimeRange = startTimeRange + inLength;
            INDArray inputSubset = input.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
            if (inLength > 1)
                assertTrue(inputSubset.size(2) == inLength);
            INDArray[] outArr = graph.rnnTimeStep(inputSubset);
            assertEquals(1, outArr.length);
            INDArray out = outArr[0];
            INDArray expOutSubset;
            if (inLength == 1) {
                int[] sizes = new int[] { fullOutL3.size(0), fullOutL3.size(1), 1 };
                expOutSubset = Nd4j.create(sizes);
                expOutSubset.tensorAlongDimension(0, 1, 0).assign(fullOutL3.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(startTimeRange)));
            } else {
                expOutSubset = fullOutL3.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
            }
            assertEquals(expOutSubset, out);
            Map<String, INDArray> currL0State = graph.rnnGetPreviousState("0");
            Map<String, INDArray> currL1State = graph.rnnGetPreviousState("1");
            INDArray lastActL0 = currL0State.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
            INDArray lastActL1 = currL1State.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
            INDArray expLastActL0 = fullOutL0.tensorAlongDimension(endTimeRange - 1, 1, 0);
            INDArray expLastActL1 = fullOutL1.tensorAlongDimension(endTimeRange - 1, 1, 0);
            assertEquals(expLastActL0, lastActL0);
            assertEquals(expLastActL1, lastActL1);
        }
    }
}
Also used : NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) RnnToFeedForwardPreProcessor(org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor) GravesLSTM(org.deeplearning4j.nn.layers.recurrent.GravesLSTM) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) FeedForwardToRnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor) Test(org.junit.Test)

Example 64 with ComputationGraphConfiguration

use of org.deeplearning4j.nn.conf.ComputationGraphConfiguration in project deeplearning4j by deeplearning4j.

the class TestConvolutionModes method testStrictTruncateConvolutionModeCompGraph.

@Test
public void testStrictTruncateConvolutionModeCompGraph() {
    //Idea: with convolution mode == Truncate, input size shouldn't matter (within the bounds of truncated edge),
    // and edge data shouldn't affect the output
    //Use: 9x9, kernel 3, stride 3, padding 0
    // Should get same output for 10x10 and 11x11...
    Nd4j.getRandom().setSeed(12345);
    int[] minibatches = { 1, 3 };
    int[] inDepths = { 1, 3 };
    int[] inSizes = { 9, 10, 11 };
    for (boolean isSubsampling : new boolean[] { false, true }) {
        for (int minibatch : minibatches) {
            for (int inDepth : inDepths) {
                INDArray origData = Nd4j.rand(new int[] { minibatch, inDepth, 9, 9 });
                for (int inSize : inSizes) {
                    for (ConvolutionMode cm : new ConvolutionMode[] { ConvolutionMode.Strict, ConvolutionMode.Truncate }) {
                        INDArray inputData = Nd4j.rand(new int[] { minibatch, inDepth, inSize, inSize });
                        inputData.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 9), NDArrayIndex.interval(0, 9)).assign(origData);
                        Layer layer;
                        if (isSubsampling) {
                            layer = new SubsamplingLayer.Builder().kernelSize(3, 3).stride(3, 3).padding(0, 0).build();
                        } else {
                            layer = new ConvolutionLayer.Builder().kernelSize(3, 3).stride(3, 3).padding(0, 0).nOut(3).build();
                        }
                        ComputationGraph net = null;
                        try {
                            ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER).convolutionMode(cm).graphBuilder().addInputs("in").addLayer("0", layer, "in").addLayer("1", new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT).nOut(3).build(), "0").setOutputs("1").setInputTypes(InputType.convolutional(inSize, inSize, inDepth)).build();
                            net = new ComputationGraph(conf);
                            net.init();
                            if (inSize > 9 && cm == ConvolutionMode.Strict) {
                                fail("Expected exception");
                            }
                        } catch (DL4JException e) {
                            if (inSize == 9 || cm != ConvolutionMode.Strict) {
                                e.printStackTrace();
                                fail("Unexpected exception");
                            }
                            //Expected exception
                            continue;
                        } catch (Exception e) {
                            e.printStackTrace();
                            fail("Unexpected exception");
                        }
                        INDArray out = net.outputSingle(origData);
                        INDArray out2 = net.outputSingle(inputData);
                        assertEquals(out, out2);
                    }
                }
            }
        }
    }
}
Also used : DL4JException(org.deeplearning4j.exception.DL4JException) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) ConvolutionLayer(org.deeplearning4j.nn.conf.layers.ConvolutionLayer) DL4JException(org.deeplearning4j.exception.DL4JException) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) ComputationGraph(org.deeplearning4j.nn.graph.ComputationGraph) ConvolutionMode(org.deeplearning4j.nn.conf.ConvolutionMode) Test(org.junit.Test)

Example 65 with ComputationGraphConfiguration

use of org.deeplearning4j.nn.conf.ComputationGraphConfiguration in project deeplearning4j by deeplearning4j.

the class TestCustomLayers method testJsonComputationGraph.

@Test
public void testJsonComputationGraph() {
    //ComputationGraph with a custom layer; check JSON and YAML config actually works...
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.1).graphBuilder().addInputs("in").addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in").addLayer("1", new CustomLayer(3.14159), "0").addLayer("2", new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(10).nOut(10).build(), "1").setOutputs("2").pretrain(false).backprop(true).build();
    String json = conf.toJson();
    String yaml = conf.toYaml();
    System.out.println(json);
    ComputationGraphConfiguration confFromJson = ComputationGraphConfiguration.fromJson(json);
    assertEquals(conf, confFromJson);
    ComputationGraphConfiguration confFromYaml = ComputationGraphConfiguration.fromYaml(yaml);
    assertEquals(conf, confFromYaml);
}
Also used : CustomLayer(org.deeplearning4j.nn.layers.custom.testclasses.CustomLayer) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) Test(org.junit.Test)

Aggregations

ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)96 Test (org.junit.Test)84 ComputationGraph (org.deeplearning4j.nn.graph.ComputationGraph)63 INDArray (org.nd4j.linalg.api.ndarray.INDArray)51 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)50 NormalDistribution (org.deeplearning4j.nn.conf.distribution.NormalDistribution)28 DataSet (org.nd4j.linalg.dataset.DataSet)22 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)20 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)17 ScoreIterationListener (org.deeplearning4j.optimize.listeners.ScoreIterationListener)17 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)17 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)14 Random (java.util.Random)13 ParameterAveragingTrainingMaster (org.deeplearning4j.spark.impl.paramavg.ParameterAveragingTrainingMaster)11 InMemoryModelSaver (org.deeplearning4j.earlystopping.saver.InMemoryModelSaver)10 MaxEpochsTerminationCondition (org.deeplearning4j.earlystopping.termination.MaxEpochsTerminationCondition)10 MultiDataSet (org.nd4j.linalg.dataset.MultiDataSet)10 MaxTimeIterationTerminationCondition (org.deeplearning4j.earlystopping.termination.MaxTimeIterationTerminationCondition)9 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)9 RnnOutputLayer (org.deeplearning4j.nn.conf.layers.RnnOutputLayer)9