Search in sources :

Example 71 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class TestDecayPolicies method testLearningRateScheduleMLN.

@Test
public void testLearningRateScheduleMLN() {
    Map<Integer, Double> learningRateAfter = new HashMap<>();
    learningRateAfter.put(1, 0.2);
    int iterations = 2;
    int[] nIns = { 4, 2 };
    int[] nOuts = { 2, 3 };
    for (org.deeplearning4j.nn.conf.Updater updaterFunc : updaters) {
        double lr = 1e-2;
        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).learningRateDecayPolicy(LearningRatePolicy.Schedule).learningRateSchedule(learningRateAfter).iterations(iterations).updater(updaterFunc).list().layer(0, new DenseLayer.Builder().nIn(nIns[0]).nOut(nOuts[0]).build()).layer(1, new OutputLayer.Builder().nIn(nIns[1]).nOut(nOuts[1]).build()).backprop(true).pretrain(false).build();
        MultiLayerNetwork net = new MultiLayerNetwork(conf);
        net.init();
        Updater updater = UpdaterCreator.getUpdater(net);
        String wKey, bKey;
        for (int i = 0; i < 2; i++) {
            Gradient gradientActual = new DefaultGradient();
            Gradient gradientExpected = new DefaultGradient();
            for (int k = 0; k < net.getnLayers(); k++) {
                wKey = String.valueOf(k) + "_" + DefaultParamInitializer.WEIGHT_KEY;
                gradientActual.setGradientFor(wKey, Nd4j.ones(nIns[k], nOuts[k]));
                gradientExpected.setGradientFor(wKey, Nd4j.ones(nIns[k], nOuts[k]));
                bKey = String.valueOf(k) + "_" + DefaultParamInitializer.BIAS_KEY;
                gradientActual.setGradientFor(bKey, Nd4j.ones(1, nOuts[k]));
                gradientExpected.setGradientFor(bKey, Nd4j.ones(1, nOuts[k]));
            }
            updater.update(net, gradientActual, i, 1);
            if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.SGD))
                lr = testSGDComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
            else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.ADAGRAD))
                lr = testAdaGradComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
            else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.ADAM))
                lr = testAdamComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
            else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.RMSPROP))
                lr = testRMSPropComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
            if (i == 0)
                assertEquals(lr, net.getLayer(1).conf().getLearningRateByParam("W"), lr);
            else
                assertEquals(lr, net.getLayer(1).conf().getLearningRateByParam("W"), learningRateAfter.get(1));
        }
    }
}
Also used : OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) HashMap(java.util.HashMap) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) Updater(org.deeplearning4j.nn.api.Updater) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Test(org.junit.Test)

Example 72 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class TestDecayPolicies method testLearningRatePolyDecaySingleLayer.

@Test
public void testLearningRatePolyDecaySingleLayer() {
    int iterations = 2;
    double lr = 1e-2;
    double power = 3;
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).learningRateDecayPolicy(LearningRatePolicy.Poly).lrPolicyPower(power).iterations(iterations).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    Gradient gradientActual = new DefaultGradient();
    gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
    gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
    for (int i = 0; i < iterations; i++) {
        updater.update(layer, gradientActual, i, 1);
        double expectedLr = calcPolyDecay(lr, i, power, iterations);
        assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
        assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
    }
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Updater(org.deeplearning4j.nn.api.Updater) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) Test(org.junit.Test)

Example 73 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class TestGradientNormalization method testL2ClippingPerParamType.

@Test
public void testL2ClippingPerParamType() {
    Nd4j.getRandom().setSeed(12345);
    double threshold = 3;
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer(new DenseLayer.Builder().nIn(10).nOut(20).updater(org.deeplearning4j.nn.conf.Updater.NONE).gradientNormalization(GradientNormalization.ClipL2PerParamType).gradientNormalizationThreshold(threshold).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    INDArray weightGrad = Nd4j.rand(10, 20).muli(0.05);
    INDArray biasGrad = Nd4j.rand(1, 10).muli(10);
    INDArray weightGradCopy = weightGrad.dup();
    INDArray biasGradCopy = biasGrad.dup();
    Gradient gradient = new DefaultGradient();
    gradient.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGrad);
    gradient.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGrad);
    double weightL2 = weightGrad.norm2Number().doubleValue();
    double biasL2 = biasGrad.norm2Number().doubleValue();
    assertTrue(weightL2 < threshold);
    assertTrue(biasL2 > threshold);
    updater.update(layer, gradient, 0, 1);
    //weight norm2 < threshold -> no change
    assertEquals(weightGradCopy, weightGrad);
    //bias norm2 > threshold -> rescale
    assertNotEquals(biasGradCopy, biasGrad);
    double biasScalingFactor = threshold / biasL2;
    INDArray expectedBiasGrad = biasGradCopy.mul(biasScalingFactor);
    assertEquals(expectedBiasGrad, gradient.getGradientFor(DefaultParamInitializer.BIAS_KEY));
}
Also used : DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Updater(org.deeplearning4j.nn.api.Updater) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) Test(org.junit.Test)

Example 74 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class TestGradientNormalization method testAbsValueClippingPerElement.

@Test
public void testAbsValueClippingPerElement() {
    Nd4j.getRandom().setSeed(12345);
    double threshold = 3;
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer(new DenseLayer.Builder().nIn(10).nOut(20).updater(org.deeplearning4j.nn.conf.Updater.NONE).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(threshold).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    INDArray weightGrad = Nd4j.rand(10, 20).muli(10).subi(5);
    INDArray biasGrad = Nd4j.rand(1, 10).muli(10).subi(5);
    INDArray weightGradCopy = weightGrad.dup();
    INDArray biasGradCopy = biasGrad.dup();
    Gradient gradient = new DefaultGradient();
    gradient.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGrad);
    gradient.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGrad);
    updater.update(layer, gradient, 0, 1);
    assertNotEquals(weightGradCopy, weightGrad);
    assertNotEquals(biasGradCopy, biasGrad);
    INDArray expectedWeightGrad = weightGradCopy.dup();
    for (int i = 0; i < expectedWeightGrad.length(); i++) {
        double d = expectedWeightGrad.getDouble(i);
        if (d > threshold)
            expectedWeightGrad.putScalar(i, threshold);
        else if (d < -threshold)
            expectedWeightGrad.putScalar(i, -threshold);
    }
    INDArray expectedBiasGrad = biasGradCopy.dup();
    for (int i = 0; i < expectedBiasGrad.length(); i++) {
        double d = expectedBiasGrad.getDouble(i);
        if (d > threshold)
            expectedBiasGrad.putScalar(i, threshold);
        else if (d < -threshold)
            expectedBiasGrad.putScalar(i, -threshold);
    }
    assertEquals(expectedWeightGrad, gradient.getGradientFor(DefaultParamInitializer.WEIGHT_KEY));
    assertEquals(expectedBiasGrad, gradient.getGradientFor(DefaultParamInitializer.BIAS_KEY));
}
Also used : DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Updater(org.deeplearning4j.nn.api.Updater) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) Test(org.junit.Test)

Example 75 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class TestGradientNormalization method testRenormalizatonPerLayer.

@Test
public void testRenormalizatonPerLayer() {
    Nd4j.getRandom().setSeed(12345);
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer(new DenseLayer.Builder().nIn(10).nOut(20).updater(org.deeplearning4j.nn.conf.Updater.NONE).gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    INDArray weightGrad = Nd4j.rand(10, 20);
    INDArray biasGrad = Nd4j.rand(1, 10);
    INDArray weightGradCopy = weightGrad.dup();
    INDArray biasGradCopy = biasGrad.dup();
    Gradient gradient = new DefaultGradient();
    gradient.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGrad);
    gradient.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGrad);
    updater.update(layer, gradient, 0, 1);
    assertNotEquals(weightGradCopy, weightGrad);
    assertNotEquals(biasGradCopy, biasGrad);
    double sumSquaresWeight = weightGradCopy.mul(weightGradCopy).sumNumber().doubleValue();
    double sumSquaresBias = biasGradCopy.mul(biasGradCopy).sumNumber().doubleValue();
    double sumSquares = sumSquaresWeight + sumSquaresBias;
    double l2Layer = Math.sqrt(sumSquares);
    INDArray normWeightsExpected = weightGradCopy.div(l2Layer);
    INDArray normBiasExpected = biasGradCopy.div(l2Layer);
    double l2Weight = gradient.getGradientFor(DefaultParamInitializer.WEIGHT_KEY).norm2Number().doubleValue();
    double l2Bias = gradient.getGradientFor(DefaultParamInitializer.BIAS_KEY).norm2Number().doubleValue();
    assertTrue(!Double.isNaN(l2Weight) && l2Weight > 0.0);
    assertTrue(!Double.isNaN(l2Bias) && l2Bias > 0.0);
    assertEquals(normWeightsExpected, gradient.getGradientFor(DefaultParamInitializer.WEIGHT_KEY));
    assertEquals(normBiasExpected, gradient.getGradientFor(DefaultParamInitializer.BIAS_KEY));
}
Also used : DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Updater(org.deeplearning4j.nn.api.Updater) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) Test(org.junit.Test)

Aggregations

Gradient (org.deeplearning4j.nn.gradient.Gradient)105 INDArray (org.nd4j.linalg.api.ndarray.INDArray)100 DefaultGradient (org.deeplearning4j.nn.gradient.DefaultGradient)72 Test (org.junit.Test)52 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)35 Pair (org.deeplearning4j.berkeley.Pair)28 Layer (org.deeplearning4j.nn.api.Layer)28 Updater (org.deeplearning4j.nn.api.Updater)25 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)24 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)21 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)9 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)8 IActivation (org.nd4j.linalg.activations.IActivation)6 HashMap (java.util.HashMap)5 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)5 ArrayList (java.util.ArrayList)4 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)4 DL4JInvalidInputException (org.deeplearning4j.exception.DL4JInvalidInputException)4 IOutputLayer (org.deeplearning4j.nn.api.layers.IOutputLayer)4 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)4