Search in sources :

Example 76 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class TestGradientNormalization method testRenormalizationPerParamType.

@Test
public void testRenormalizationPerParamType() {
    Nd4j.getRandom().setSeed(12345);
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer(new DenseLayer.Builder().nIn(10).nOut(20).updater(org.deeplearning4j.nn.conf.Updater.NONE).gradientNormalization(GradientNormalization.RenormalizeL2PerParamType).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    INDArray weightGrad = Nd4j.rand(10, 20);
    INDArray biasGrad = Nd4j.rand(1, 10);
    INDArray weightGradCopy = weightGrad.dup();
    INDArray biasGradCopy = biasGrad.dup();
    Gradient gradient = new DefaultGradient();
    gradient.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGrad);
    gradient.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGrad);
    updater.update(layer, gradient, 0, 1);
    INDArray normWeightsExpected = weightGradCopy.div(weightGradCopy.norm2Number());
    INDArray normBiasExpected = biasGradCopy.div(biasGradCopy.norm2Number());
    assertEquals(normWeightsExpected, gradient.getGradientFor(DefaultParamInitializer.WEIGHT_KEY));
    assertEquals(normBiasExpected, gradient.getGradientFor(DefaultParamInitializer.BIAS_KEY));
}
Also used : DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Updater(org.deeplearning4j.nn.api.Updater) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) Test(org.junit.Test)

Example 77 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class TestUpdaters method testRMSPropUpdater.

@Test
public void testRMSPropUpdater() {
    double lr = 0.01;
    double rmsDecay = 0.25;
    Map<String, INDArray> lastG = new HashMap<>();
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).rmsDecay(rmsDecay).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.RMSPROP).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    int updaterStateSize = updater.stateSizeForLayer(layer);
    INDArray updaterState = Nd4j.create(1, updaterStateSize);
    updater.setStateViewArray(layer, updaterState, true);
    updater.update(layer, gradient, -1, 1);
    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
    double epsilon = 1e-8;
    for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
        key = entry.getKey();
        val = entry.getValue();
        INDArray lastGTmp = lastG.get(key);
        if (lastGTmp == null)
            lastGTmp = Nd4j.zeros(val.shape());
        lastGTmp.muli(rmsDecay).addi(val.mul(val).muli(1 - rmsDecay));
        gradExpected = val.mul(lr).div(Transforms.sqrt(lastGTmp.add(epsilon)));
        assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
        lastG.put(key, lastGTmp);
    }
    assertEquals(rmsDecay, layer.conf().getLayer().getRmsDecay(), 1e-4);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Updater(org.deeplearning4j.nn.api.Updater) Test(org.junit.Test)

Example 78 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class TestUpdaters method testSGDUpdater.

@Test
public void testSGDUpdater() {
    double lr = 0.05;
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    updater.update(layer, gradient, -1, 1);
    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
    for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
        val = entry.getValue();
        gradExpected = val.mul(lr);
        assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
    }
    assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Updater(org.deeplearning4j.nn.api.Updater) Test(org.junit.Test)

Example 79 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class TestUpdaters method testMultiLayerUpdater.

@Test
public void testMultiLayerUpdater() throws Exception {
    Nd4j.getRandom().setSeed(12345L);
    double lr = 0.03;
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).momentum(0.6).list().layer(0, new DenseLayer.Builder().nIn(4).nOut(5).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).layer(1, new DenseLayer.Builder().nIn(5).nOut(6).updater(org.deeplearning4j.nn.conf.Updater.NONE).build()).layer(2, new DenseLayer.Builder().nIn(6).nOut(7).updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build()).layer(3, new DenseLayer.Builder().nIn(7).nOut(8).updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build()).build();
    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
    Updater updater = UpdaterCreator.getUpdater(net);
    assertNotNull(updater);
    assertTrue(updater.getClass() == MultiLayerUpdater.class);
    Field f = MultiLayerUpdater.class.getDeclaredField("layerUpdaters");
    f.setAccessible(true);
    Updater[] updaters = (Updater[]) f.get(updater);
    assertNotNull(updaters);
    assertTrue(updaters.length == net.getnLayers());
    assertTrue(updaters[0] instanceof LayerUpdater);
    assertTrue(updaters[1] instanceof LayerUpdater);
    assertTrue(updaters[2] instanceof LayerUpdater);
    assertTrue(updaters[3] instanceof LayerUpdater);
    int count = 0;
    for (Updater u : updaters) {
        LayerUpdater lu = (LayerUpdater) u;
        for (GradientUpdater gu : lu.updaterForVariable.values()) {
            switch(count) {
                case 0:
                    assertTrue(gu instanceof Sgd);
                    break;
                case 1:
                    assertTrue(gu instanceof org.nd4j.linalg.learning.NoOpUpdater);
                    break;
                case 2:
                    assertTrue(gu instanceof AdaGrad);
                    break;
                case 3:
                    assertTrue(gu instanceof Nesterovs);
                    break;
                default:
                    throw new RuntimeException();
            }
        }
        count++;
    }
    LayerUpdater u = (LayerUpdater) updaters[0];
    Updater[] uArr = new Updater[4];
    uArr[0] = new LayerUpdater();
    uArr[1] = new LayerUpdater();
    uArr[2] = new LayerUpdater();
    INDArray updaterState = Nd4j.create(1, 6 * 7 + 7, 'f');
    uArr[2].setStateViewArray(net.getLayer(2), updaterState, true);
    uArr[3] = new LayerUpdater();
    //        updaterStateSize = uArr[3].stateSizeForLayer(net.getLayer(3));
    updaterState = Nd4j.create(1, 7 * 8 + 8, 'f');
    uArr[3].setStateViewArray(net.getLayer(3), updaterState, true);
    int[] nIns = { 4, 5, 6, 7 };
    int[] nOuts = { 5, 6, 7, 8 };
    for (int i = 0; i < 5; i++) {
        Gradient gradient = new DefaultGradient();
        Map<String, INDArray> expectedGradient = new LinkedHashMap<>();
        for (int j = 0; j < net.getnLayers(); j++) {
            //Generate test gradient:
            INDArray wGrad = Nd4j.rand(nIns[j], nOuts[j]);
            INDArray bGrad = Nd4j.rand(1, nOuts[j]);
            String wKey = j + "_" + DefaultParamInitializer.WEIGHT_KEY;
            String bKey = j + "_" + DefaultParamInitializer.BIAS_KEY;
            gradient.setGradientFor(wKey, wGrad);
            gradient.setGradientFor(bKey, bGrad);
            //Also put copy of gradient through separate layer updaters to compare
            Gradient layerGradient = new DefaultGradient();
            layerGradient.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, wGrad.dup());
            layerGradient.setGradientFor(DefaultParamInitializer.BIAS_KEY, bGrad.dup());
            uArr[j].update(net.getLayer(j), layerGradient, i, 1);
            for (String s : layerGradient.gradientForVariable().keySet()) {
                expectedGradient.put(j + "_" + s, layerGradient.getGradientFor(s));
            }
        }
        updater.update(net, gradient, i, 1);
        assertEquals(gradient.gradientForVariable(), expectedGradient);
    }
}
Also used : org.nd4j.linalg.learning(org.nd4j.linalg.learning) Field(java.lang.reflect.Field) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) Updater(org.deeplearning4j.nn.api.Updater) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Test(org.junit.Test)

Example 80 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class TestUpdaters method testPretrain.

@Test
public void testPretrain() {
    double lr = 0.05;
    gradient.setGradientFor(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbiasGradient.dup());
    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
    gradientDup.setGradientFor(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbiasGradient.dup());
    Gradient gradientDup2 = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
    gradientDup.setGradientFor(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbiasGradient.dup());
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).seed(42).layer(new org.deeplearning4j.nn.conf.layers.RBM.Builder().lossFunction(LossFunctions.LossFunction.COSINE_PROXIMITY).activation(Activation.IDENTITY).updater(org.deeplearning4j.nn.conf.Updater.SGD).nIn(nIn).nOut(nOut).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    boolean preTrain = true;
    conf.setPretrain(preTrain);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    updater.update(layer, gradient, -1, 1);
    for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
        val = entry.getValue();
        gradExpected = val.mul(lr);
        assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
    }
    assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4);
    preTrain = false;
    conf.setPretrain(preTrain);
    gradient = gradientDup;
    params = Nd4j.create(1, numParams);
    layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    updater.update(layer, gradient, -1, 1);
    for (Map.Entry<String, INDArray> entry : gradientDup2.gradientForVariable().entrySet()) {
        val = entry.getValue();
        if (entry.getKey() != "vb")
            gradExpected = val.mul(lr);
        else
            gradExpected = val;
        assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
    }
    assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Updater(org.deeplearning4j.nn.api.Updater) Test(org.junit.Test)

Aggregations

Gradient (org.deeplearning4j.nn.gradient.Gradient)105 INDArray (org.nd4j.linalg.api.ndarray.INDArray)100 DefaultGradient (org.deeplearning4j.nn.gradient.DefaultGradient)72 Test (org.junit.Test)52 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)35 Pair (org.deeplearning4j.berkeley.Pair)28 Layer (org.deeplearning4j.nn.api.Layer)28 Updater (org.deeplearning4j.nn.api.Updater)25 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)24 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)21 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)9 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)8 IActivation (org.nd4j.linalg.activations.IActivation)6 HashMap (java.util.HashMap)5 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)5 ArrayList (java.util.ArrayList)4 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)4 DL4JInvalidInputException (org.deeplearning4j.exception.DL4JInvalidInputException)4 IOutputLayer (org.deeplearning4j.nn.api.layers.IOutputLayer)4 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)4