Search in sources :

Example 16 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class TestDecayPolicies method testLearningRateExponentialDecaySingleLayer.

@Test
public void testLearningRateExponentialDecaySingleLayer() {
    int iterations = 2;
    double lr = 1e-2;
    double decayRate = 2;
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).learningRateDecayPolicy(LearningRatePolicy.Exponential).lrPolicyDecayRate(decayRate).iterations(iterations).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    Gradient gradientActual = new DefaultGradient();
    gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
    gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
    for (int i = 0; i < iterations; i++) {
        updater.update(layer, gradientActual, i, 1);
        double expectedLr = calcExponentialDecay(lr, decayRate, i);
        assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
        assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
    }
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Updater(org.deeplearning4j.nn.api.Updater) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) Test(org.junit.Test)

Example 17 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class TestDecayPolicies method testLearningRateStepDecaySingleLayer.

@Test
public void testLearningRateStepDecaySingleLayer() {
    int iterations = 2;
    double lr = 1e-2;
    double decayRate = 2;
    double steps = 3;
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).learningRateDecayPolicy(LearningRatePolicy.Step).lrPolicyDecayRate(decayRate).lrPolicySteps(steps).iterations(iterations).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    Gradient gradientActual = new DefaultGradient();
    gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
    gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
    for (int i = 0; i < iterations; i++) {
        updater.update(layer, gradientActual, i, 1);
        double expectedLr = calcStepDecay(lr, decayRate, i, steps);
        assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
        assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
    }
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Updater(org.deeplearning4j.nn.api.Updater) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) Test(org.junit.Test)

Example 18 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class TestVariableLengthTS method testVariableLengthSimple.

@Test
public void testVariableLengthSimple() {
    //Test: Simple RNN layer + RNNOutputLayer
    //Length of 4 for standard
    //Length of 5 with last time step output mask set to 0
    //Expect the same gradients etc in both cases...
    int[] miniBatchSizes = { 1, 2, 5 };
    int nOut = 1;
    Random r = new Random(12345);
    for (int nExamples : miniBatchSizes) {
        Nd4j.getRandom().setSeed(12345);
        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).updater(Updater.SGD).learningRate(0.1).seed(12345).list().layer(0, new GravesLSTM.Builder().activation(Activation.TANH).nIn(2).nOut(2).build()).layer(1, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(2).nOut(1).build()).build();
        MultiLayerNetwork net = new MultiLayerNetwork(conf);
        net.init();
        INDArray in1 = Nd4j.rand(new int[] { nExamples, 2, 4 });
        INDArray in2 = Nd4j.rand(new int[] { nExamples, 2, 5 });
        in2.put(new INDArrayIndex[] { NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 3, true) }, in1);
        assertEquals(in1, in2.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4)));
        INDArray labels1 = Nd4j.rand(new int[] { nExamples, 1, 4 });
        INDArray labels2 = Nd4j.create(nExamples, 1, 5);
        labels2.put(new INDArrayIndex[] { NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 3, true) }, labels1);
        assertEquals(labels1, labels2.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4)));
        INDArray labelMask = Nd4j.ones(nExamples, 5);
        for (int j = 0; j < nExamples; j++) {
            labelMask.putScalar(new int[] { j, 4 }, 0);
        }
        net.setInput(in1);
        net.setLabels(labels1);
        net.computeGradientAndScore();
        double score1 = net.score();
        Gradient g1 = net.gradient();
        net.setInput(in2);
        net.setLabels(labels2);
        net.setLayerMaskArrays(null, labelMask);
        net.computeGradientAndScore();
        double score2 = net.score();
        Gradient g2 = net.gradient();
        //Scores and gradients should be identical for two cases (given mask array)
        assertEquals(score1, score2, 0.0);
        Map<String, INDArray> g1map = g1.gradientForVariable();
        Map<String, INDArray> g2map = g2.gradientForVariable();
        for (String s : g1map.keySet()) {
            INDArray g1s = g1map.get(s);
            INDArray g2s = g2map.get(s);
            assertEquals(s, g1s, g2s);
        }
        // (a) score, (b) gradients
        for (int i = 0; i < nExamples; i++) {
            for (int j = 0; j < nOut; j++) {
                double d = r.nextDouble();
                labels2.putScalar(new int[] { i, j, 4 }, d);
            }
            net.setLabels(labels2);
            net.computeGradientAndScore();
            double score2a = net.score();
            Gradient g2a = net.gradient();
            assertEquals(score2, score2a, 0.0);
            for (String s : g2map.keySet()) {
                INDArray g2s = g2map.get(s);
                INDArray g2sa = g2a.getGradientFor(s);
                assertEquals(s, g2s, g2sa);
            }
        }
    }
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Test(org.junit.Test)

Example 19 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class TestVariableLengthTS method testInputMasking.

@Test
public void testInputMasking() {
    //Idea: have masking on the input with 2 dense layers on input
    //Ensure that the parameter gradients for the inputs don't depend on the inputs when inputs are masked
    int[] miniBatchSizes = { 1, 2, 5 };
    int nIn = 2;
    Random r = new Random(12345);
    for (int nExamples : miniBatchSizes) {
        Nd4j.getRandom().setSeed(12345);
        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).updater(Updater.SGD).learningRate(0.1).seed(12345).list().layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build()).layer(1, new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build()).layer(2, new GravesLSTM.Builder().activation(Activation.TANH).nIn(2).nOut(2).build()).layer(3, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(2).nOut(1).build()).inputPreProcessor(0, new RnnToFeedForwardPreProcessor()).inputPreProcessor(2, new FeedForwardToRnnPreProcessor()).build();
        MultiLayerNetwork net = new MultiLayerNetwork(conf);
        net.init();
        INDArray in1 = Nd4j.rand(new int[] { nExamples, 2, 4 });
        INDArray in2 = Nd4j.rand(new int[] { nExamples, 2, 5 });
        in2.put(new INDArrayIndex[] { NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 3, true) }, in1);
        assertEquals(in1, in2.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4)));
        INDArray labels1 = Nd4j.rand(new int[] { nExamples, 1, 4 });
        INDArray labels2 = Nd4j.create(nExamples, 1, 5);
        labels2.put(new INDArrayIndex[] { NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 3, true) }, labels1);
        assertEquals(labels1, labels2.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4)));
        INDArray inputMask = Nd4j.ones(nExamples, 5);
        for (int j = 0; j < nExamples; j++) {
            inputMask.putScalar(new int[] { j, 4 }, 0);
        }
        net.setInput(in1);
        net.setLabels(labels1);
        net.computeGradientAndScore();
        double score1 = net.score();
        Gradient g1 = net.gradient();
        Map<String, INDArray> map1 = g1.gradientForVariable();
        for (String s : map1.keySet()) {
            //Note: gradients are a view normally -> second computeGradientAndScore would have modified the original gradient map values...
            map1.put(s, map1.get(s).dup());
        }
        net.setInput(in2);
        net.setLabels(labels2);
        net.setLayerMaskArrays(inputMask, null);
        net.computeGradientAndScore();
        double score2 = net.score();
        Gradient g2 = net.gradient();
        List<INDArray> activations2 = net.feedForward();
        //Scores should differ here: masking the input, not the output. Therefore 4 vs. 5 time step outputs
        assertNotEquals(score1, score2, 0.01);
        Map<String, INDArray> g1map = g1.gradientForVariable();
        Map<String, INDArray> g2map = g2.gradientForVariable();
        for (String s : g1map.keySet()) {
            INDArray g1s = g1map.get(s);
            INDArray g2s = g2map.get(s);
            System.out.println("-------");
            System.out.println("Variable: " + s);
            System.out.println(Arrays.toString(g1s.dup().data().asFloat()));
            System.out.println(Arrays.toString(g2s.dup().data().asFloat()));
            assertNotEquals(s, g1s, g2s);
        }
        //Modify the values at the masked time step, and check that neither the gradients, score or activations change
        for (int j = 0; j < nExamples; j++) {
            for (int k = 0; k < nIn; k++) {
                in2.putScalar(new int[] { j, k, 4 }, r.nextDouble());
            }
            net.setInput(in2);
            net.computeGradientAndScore();
            double score2a = net.score();
            Gradient g2a = net.gradient();
            assertEquals(score2, score2a, 1e-12);
            for (String s : g2.gradientForVariable().keySet()) {
                assertEquals(g2.getGradientFor(s), g2a.getGradientFor(s));
            }
            List<INDArray> activations2a = net.feedForward();
            for (int k = 1; k < activations2.size(); k++) {
                assertEquals(activations2.get(k), activations2a.get(k));
            }
        }
        //Finally: check that the activations for the first two (dense) layers are zero at the appropriate time step
        FeedForwardToRnnPreProcessor temp = new FeedForwardToRnnPreProcessor();
        INDArray l0Before = activations2.get(1);
        INDArray l1Before = activations2.get(2);
        INDArray l0After = temp.preProcess(l0Before, nExamples);
        INDArray l1After = temp.preProcess(l1Before, nExamples);
        for (int j = 0; j < nExamples; j++) {
            for (int k = 0; k < nIn; k++) {
                assertEquals(0.0, l0After.getDouble(j, k, 4), 0.0);
                assertEquals(0.0, l1After.getDouble(j, k, 4), 0.0);
            }
        }
    }
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) RnnToFeedForwardPreProcessor(org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) INDArray(org.nd4j.linalg.api.ndarray.INDArray) FeedForwardToRnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor) Test(org.junit.Test)

Example 20 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class MultiLayerTest method testBackpropGradient.

@Test
public void testBackpropGradient() {
    //Testing: MultiLayerNetwork.backpropGradient()
    //i.e., specifically without an output layer
    int nIn = 10;
    int nOut = 40;
    int miniBatch = 5;
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(org.deeplearning4j.nn.conf.Updater.SGD).learningRate(0.1).list().layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).activation(Activation.RELU).weightInit(WeightInit.XAVIER).build()).layer(1, new DenseLayer.Builder().nIn(20).nOut(30).activation(Activation.RELU).weightInit(WeightInit.XAVIER).build()).layer(2, new DenseLayer.Builder().nIn(30).nOut(nOut).activation(Activation.RELU).weightInit(WeightInit.XAVIER).build()).build();
    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
    Nd4j.getRandom().setSeed(12345);
    INDArray eps = Nd4j.rand(miniBatch, nOut);
    INDArray input = Nd4j.rand(miniBatch, nIn);
    //Need to feed forward before backprop
    net.feedForward(input);
    Pair<Gradient, INDArray> pair = net.backpropGradient(eps);
    INDArray epsOut = pair.getSecond();
    assertNotNull(epsOut);
    assertArrayEquals(new int[] { miniBatch, nIn }, epsOut.shape());
    Gradient g = pair.getFirst();
    Map<String, INDArray> gradMap = g.gradientForVariable();
    //3 layers, weight + bias gradients for each
    assertEquals(6, gradMap.size());
    String[] expKeys = { "0_" + DefaultParamInitializer.WEIGHT_KEY, "0_" + DefaultParamInitializer.BIAS_KEY, "1_" + DefaultParamInitializer.WEIGHT_KEY, "2_" + DefaultParamInitializer.BIAS_KEY, "2_" + DefaultParamInitializer.WEIGHT_KEY, "2_" + DefaultParamInitializer.BIAS_KEY };
    Set<String> keys = gradMap.keySet();
    for (String s : expKeys) {
        assertTrue(keys.contains(s));
    }
/*
        System.out.println(pair);
        
        //Use updater to go from raw gradients -> updates
        //Apply learning rate, gradient clipping, adagrad/momentum/rmsprop etc
        Updater updater = UpdaterCreator.getUpdater(net);
        updater.update(net, g, 0, miniBatch);
        
        StepFunction stepFunction = new NegativeGradientStepFunction();
        INDArray params = net.params();
        System.out.println(Arrays.toString(params.get(NDArrayIndex.all(), NDArrayIndex.interval(0, 10)).dup().data().asFloat()));
        stepFunction.step(params, g.gradient());
        net.setParams(params);    //params() may not be in-place
        System.out.println(Arrays.toString(params.get(NDArrayIndex.all(), NDArrayIndex.interval(0, 10)).dup().data().asFloat()));
        */
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Test(org.junit.Test)

Aggregations

Gradient (org.deeplearning4j.nn.gradient.Gradient)105 INDArray (org.nd4j.linalg.api.ndarray.INDArray)100 DefaultGradient (org.deeplearning4j.nn.gradient.DefaultGradient)72 Test (org.junit.Test)52 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)35 Pair (org.deeplearning4j.berkeley.Pair)28 Layer (org.deeplearning4j.nn.api.Layer)28 Updater (org.deeplearning4j.nn.api.Updater)25 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)24 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)21 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)9 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)8 IActivation (org.nd4j.linalg.activations.IActivation)6 HashMap (java.util.HashMap)5 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)5 ArrayList (java.util.ArrayList)4 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)4 DL4JInvalidInputException (org.deeplearning4j.exception.DL4JInvalidInputException)4 IOutputLayer (org.deeplearning4j.nn.api.layers.IOutputLayer)4 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)4