Search in sources :

Example 51 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class TestVariableLengthTSCG method testInputMasking.

@Test
public void testInputMasking() {
    //Idea: have masking on the input with 2 dense layers on input
    //Ensure that the parameter gradients for the inputs don't depend on the inputs when inputs are masked
    int[] miniBatchSizes = { 1, 2, 5 };
    int nIn = 2;
    Random r = new Random(12345);
    for (int nExamples : miniBatchSizes) {
        Nd4j.getRandom().setSeed(12345);
        ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).updater(Updater.SGD).learningRate(0.1).seed(12345).graphBuilder().addInputs("in").addLayer("0", new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build(), "in").addLayer("1", new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build(), "0").addLayer("2", new GravesLSTM.Builder().activation(Activation.TANH).nIn(2).nOut(2).build(), "1").addLayer("3", new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(2).nOut(1).build(), "2").setOutputs("3").inputPreProcessor("0", new RnnToFeedForwardPreProcessor()).inputPreProcessor("2", new FeedForwardToRnnPreProcessor()).build();
        ComputationGraph net = new ComputationGraph(conf);
        net.init();
        INDArray in1 = Nd4j.rand(new int[] { nExamples, 2, 4 });
        INDArray in2 = Nd4j.rand(new int[] { nExamples, 2, 5 });
        in2.put(new INDArrayIndex[] { NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 3, true) }, in1);
        assertEquals(in1, in2.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4)));
        INDArray labels1 = Nd4j.rand(new int[] { nExamples, 1, 4 });
        INDArray labels2 = Nd4j.create(nExamples, 1, 5);
        labels2.put(new INDArrayIndex[] { NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 3, true) }, labels1);
        assertEquals(labels1, labels2.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4)));
        INDArray inputMask = Nd4j.ones(nExamples, 5);
        for (int j = 0; j < nExamples; j++) {
            inputMask.putScalar(new int[] { j, 4 }, 0);
        }
        net.setInput(0, in1);
        net.setLabel(0, labels1);
        net.computeGradientAndScore();
        double score1 = net.score();
        Gradient g1 = net.gradient();
        Map<String, INDArray> map = g1.gradientForVariable();
        for (String s : map.keySet()) {
            //Gradients are views; need to dup otherwise they will be modified by next computeGradientAndScore
            map.put(s, map.get(s).dup());
        }
        net.setInput(0, in2);
        net.setLabel(0, labels2);
        net.setLayerMaskArrays(new INDArray[] { inputMask }, null);
        net.computeGradientAndScore();
        double score2 = net.score();
        Gradient g2 = net.gradient();
        Map<String, INDArray> activations2 = net.feedForward();
        //Scores should differ here: masking the input, not the output. Therefore 4 vs. 5 time step outputs
        assertNotEquals(score1, score2, 0.01);
        Map<String, INDArray> g1map = g1.gradientForVariable();
        Map<String, INDArray> g2map = g2.gradientForVariable();
        for (String s : g1map.keySet()) {
            INDArray g1s = g1map.get(s);
            INDArray g2s = g2map.get(s);
            assertNotEquals(s, g1s, g2s);
        }
        //Modify the values at the masked time step, and check that neither the gradients, score or activations change
        for (int j = 0; j < nExamples; j++) {
            for (int k = 0; k < nIn; k++) {
                in2.putScalar(new int[] { j, k, 4 }, r.nextDouble());
            }
            net.setInput(0, in2);
            net.computeGradientAndScore();
            double score2a = net.score();
            Gradient g2a = net.gradient();
            assertEquals(score2, score2a, 1e-12);
            for (String s : g2.gradientForVariable().keySet()) {
                assertEquals(g2.getGradientFor(s), g2a.getGradientFor(s));
            }
            Map<String, INDArray> activations2a = net.feedForward();
            for (String s : activations2.keySet()) {
                assertEquals(activations2.get(s), activations2a.get(s));
            }
        }
        //Finally: check that the activations for the first two (dense) layers are zero at the appropriate time step
        FeedForwardToRnnPreProcessor temp = new FeedForwardToRnnPreProcessor();
        INDArray l0Before = activations2.get("0");
        INDArray l1Before = activations2.get("1");
        INDArray l0After = temp.preProcess(l0Before, nExamples);
        INDArray l1After = temp.preProcess(l1Before, nExamples);
        for (int j = 0; j < nExamples; j++) {
            for (int k = 0; k < nIn; k++) {
                assertEquals(0.0, l0After.getDouble(j, k, 4), 0.0);
                assertEquals(0.0, l1After.getDouble(j, k, 4), 0.0);
            }
        }
    }
}
Also used : RnnOutputLayer(org.deeplearning4j.nn.conf.layers.RnnOutputLayer) Gradient(org.deeplearning4j.nn.gradient.Gradient) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) RnnToFeedForwardPreProcessor(org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor) Random(java.util.Random) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) FeedForwardToRnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor) Test(org.junit.Test)

Example 52 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class TestComputationGraphNetwork method testGradientUpdate.

@Test
public void testGradientUpdate() {
    DataSetIterator iter = new IrisDataSetIterator(1, 1);
    Gradient expectedGradient = new DefaultGradient();
    expectedGradient.setGradientFor("first_W", Nd4j.ones(4, 5));
    expectedGradient.setGradientFor("first_b", Nd4j.ones(1, 5));
    expectedGradient.setGradientFor("output_W", Nd4j.ones(5, 3));
    expectedGradient.setGradientFor("output_b", Nd4j.ones(1, 3));
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder().addInputs("input").addLayer("first", new DenseLayer.Builder().nIn(4).nOut(5).build(), "input").addLayer("output", new OutputLayer.Builder().nIn(5).nOut(3).build(), "first").setOutputs("output").pretrain(false).backprop(true).build();
    ComputationGraph net = new ComputationGraph(conf);
    net.init();
    net.fit(iter.next());
    Gradient actualGradient = net.gradient;
    assertNotEquals(expectedGradient.getGradientFor("first_W"), actualGradient.getGradientFor("first_W"));
    net.update(expectedGradient);
    actualGradient = net.gradient;
    assertEquals(expectedGradient.getGradientFor("first_W"), actualGradient.getGradientFor("first_W"));
    // Update params with set
    net.setParam("first_W", Nd4j.ones(4, 5));
    net.setParam("first_b", Nd4j.ones(1, 5));
    net.setParam("output_W", Nd4j.ones(5, 3));
    net.setParam("output_b", Nd4j.ones(1, 3));
    INDArray actualParams = net.params();
    // Confirm params
    assertEquals(Nd4j.ones(1, 43), actualParams);
    net.update(expectedGradient);
    actualParams = net.params();
    assertEquals(Nd4j.ones(1, 43).addi(1), actualParams);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) INDArray(org.nd4j.linalg.api.ndarray.INDArray) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) RecordReaderMultiDataSetIterator(org.deeplearning4j.datasets.datavec.RecordReaderMultiDataSetIterator) MultiDataSetIterator(org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator) Test(org.junit.Test)

Example 53 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class TestComputationGraphNetwork method testBackwardIrisBasic.

@Test
public void testBackwardIrisBasic() {
    ComputationGraphConfiguration configuration = getIrisGraphConfiguration();
    ComputationGraph graph = new ComputationGraph(configuration);
    graph.init();
    MultiLayerConfiguration mlc = getIrisMLNConfiguration();
    MultiLayerNetwork net = new MultiLayerNetwork(mlc);
    net.init();
    DataSetIterator iris = new IrisDataSetIterator(150, 150);
    DataSet ds = iris.next();
    //Now: set parameters of both networks to be identical. Then feedforward, and check we get the same outputs
    Nd4j.getRandom().setSeed(12345);
    int nParams = (4 * 5 + 5) + (5 * 3 + 3);
    INDArray params = Nd4j.rand(1, nParams);
    graph.setParams(params.dup());
    net.setParams(params.dup());
    INDArray input = ds.getFeatureMatrix();
    INDArray labels = ds.getLabels();
    graph.setInput(0, input.dup());
    graph.setLabel(0, labels.dup());
    net.setInput(input.dup());
    net.setLabels(labels.dup());
    //Compute gradients
    net.computeGradientAndScore();
    Pair<Gradient, Double> netGradScore = net.gradientAndScore();
    graph.computeGradientAndScore();
    Pair<Gradient, Double> graphGradScore = graph.gradientAndScore();
    assertEquals(netGradScore.getSecond(), graphGradScore.getSecond(), 1e-3);
    //Compare gradients
    Gradient netGrad = netGradScore.getFirst();
    Gradient graphGrad = graphGradScore.getFirst();
    assertNotNull(graphGrad);
    assertEquals(netGrad.gradientForVariable().size(), graphGrad.gradientForVariable().size());
    assertEquals(netGrad.getGradientFor("0_W"), graphGrad.getGradientFor("firstLayer_W"));
    assertEquals(netGrad.getGradientFor("0_b"), graphGrad.getGradientFor("firstLayer_b"));
    assertEquals(netGrad.getGradientFor("1_W"), graphGrad.getGradientFor("outputLayer_W"));
    assertEquals(netGrad.getGradientFor("1_b"), graphGrad.getGradientFor("outputLayer_b"));
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) INDArray(org.nd4j.linalg.api.ndarray.INDArray) DataSet(org.nd4j.linalg.dataset.DataSet) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) RecordReaderMultiDataSetIterator(org.deeplearning4j.datasets.datavec.RecordReaderMultiDataSetIterator) MultiDataSetIterator(org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator) Test(org.junit.Test)

Example 54 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class SubsamplingLayerTest method createPrevGradient.

private Gradient createPrevGradient() {
    Gradient gradient = new DefaultGradient();
    INDArray pseudoGradients = Nd4j.ones(nExamples, nChannelsIn, inputHeight, inputWidth);
    gradient.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, pseudoGradients);
    gradient.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, pseudoGradients);
    return gradient;
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray)

Example 55 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class SubsamplingLayerTest method testSubSampleLayerNoneBackprop.

@Test
public void testSubSampleLayerNoneBackprop() throws Exception {
    INDArray expectedContainedEpsilonInput = Nd4j.create(new double[] { 1., 1., 1., 1., 1., 1., 1., 1. }, new int[] { 1, 2, 2, 2 });
    INDArray expectedContainedEpsilonResult = Nd4j.create(new double[] { 1., 1., 1., 1., 1., 1., 1., 1. }, new int[] { 1, 2, 2, 2 });
    INDArray input = getContainedData();
    Layer layer = getSubsamplingLayer(SubsamplingLayer.PoolingType.NONE);
    layer.setInput(input);
    Pair<Gradient, INDArray> containedOutput = layer.backpropGradient(expectedContainedEpsilonInput);
    assertEquals(expectedContainedEpsilonResult, containedOutput.getSecond());
    assertEquals(null, containedOutput.getFirst().getGradientFor("W"));
    assertEquals(expectedContainedEpsilonResult.shape().length, containedOutput.getSecond().shape().length);
    INDArray input2 = getData();
    layer.activate(input2);
    Pair<Gradient, INDArray> out = layer.backpropGradient(epsilon);
    // depth retained
    assertEquals(depth, out.getSecond().size(1));
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Layer(org.deeplearning4j.nn.api.Layer) OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) SubsamplingLayer(org.deeplearning4j.nn.conf.layers.SubsamplingLayer) Test(org.junit.Test)

Aggregations

Gradient (org.deeplearning4j.nn.gradient.Gradient)105 INDArray (org.nd4j.linalg.api.ndarray.INDArray)100 DefaultGradient (org.deeplearning4j.nn.gradient.DefaultGradient)72 Test (org.junit.Test)52 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)35 Pair (org.deeplearning4j.berkeley.Pair)28 Layer (org.deeplearning4j.nn.api.Layer)28 Updater (org.deeplearning4j.nn.api.Updater)25 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)24 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)21 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)9 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)8 IActivation (org.nd4j.linalg.activations.IActivation)6 HashMap (java.util.HashMap)5 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)5 ArrayList (java.util.ArrayList)4 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)4 DL4JInvalidInputException (org.deeplearning4j.exception.DL4JInvalidInputException)4 IOutputLayer (org.deeplearning4j.nn.api.layers.IOutputLayer)4 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)4