Search in sources :

Example 26 with Updater

use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.

the class TestGradientNormalization method testRenormalizatonPerLayer.

@Test
public void testRenormalizatonPerLayer() {
    Nd4j.getRandom().setSeed(12345);
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer(new DenseLayer.Builder().nIn(10).nOut(20).updater(org.deeplearning4j.nn.conf.Updater.NONE).gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    INDArray weightGrad = Nd4j.rand(10, 20);
    INDArray biasGrad = Nd4j.rand(1, 10);
    INDArray weightGradCopy = weightGrad.dup();
    INDArray biasGradCopy = biasGrad.dup();
    Gradient gradient = new DefaultGradient();
    gradient.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGrad);
    gradient.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGrad);
    updater.update(layer, gradient, 0, 1);
    assertNotEquals(weightGradCopy, weightGrad);
    assertNotEquals(biasGradCopy, biasGrad);
    double sumSquaresWeight = weightGradCopy.mul(weightGradCopy).sumNumber().doubleValue();
    double sumSquaresBias = biasGradCopy.mul(biasGradCopy).sumNumber().doubleValue();
    double sumSquares = sumSquaresWeight + sumSquaresBias;
    double l2Layer = Math.sqrt(sumSquares);
    INDArray normWeightsExpected = weightGradCopy.div(l2Layer);
    INDArray normBiasExpected = biasGradCopy.div(l2Layer);
    double l2Weight = gradient.getGradientFor(DefaultParamInitializer.WEIGHT_KEY).norm2Number().doubleValue();
    double l2Bias = gradient.getGradientFor(DefaultParamInitializer.BIAS_KEY).norm2Number().doubleValue();
    assertTrue(!Double.isNaN(l2Weight) && l2Weight > 0.0);
    assertTrue(!Double.isNaN(l2Bias) && l2Bias > 0.0);
    assertEquals(normWeightsExpected, gradient.getGradientFor(DefaultParamInitializer.WEIGHT_KEY));
    assertEquals(normBiasExpected, gradient.getGradientFor(DefaultParamInitializer.BIAS_KEY));
}
Also used : DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Updater(org.deeplearning4j.nn.api.Updater) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) Test(org.junit.Test)

Example 27 with Updater

use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.

the class TestGradientNormalization method testRenormalizationPerParamType.

@Test
public void testRenormalizationPerParamType() {
    Nd4j.getRandom().setSeed(12345);
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer(new DenseLayer.Builder().nIn(10).nOut(20).updater(org.deeplearning4j.nn.conf.Updater.NONE).gradientNormalization(GradientNormalization.RenormalizeL2PerParamType).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    INDArray weightGrad = Nd4j.rand(10, 20);
    INDArray biasGrad = Nd4j.rand(1, 10);
    INDArray weightGradCopy = weightGrad.dup();
    INDArray biasGradCopy = biasGrad.dup();
    Gradient gradient = new DefaultGradient();
    gradient.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGrad);
    gradient.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGrad);
    updater.update(layer, gradient, 0, 1);
    INDArray normWeightsExpected = weightGradCopy.div(weightGradCopy.norm2Number());
    INDArray normBiasExpected = biasGradCopy.div(biasGradCopy.norm2Number());
    assertEquals(normWeightsExpected, gradient.getGradientFor(DefaultParamInitializer.WEIGHT_KEY));
    assertEquals(normBiasExpected, gradient.getGradientFor(DefaultParamInitializer.BIAS_KEY));
}
Also used : DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Updater(org.deeplearning4j.nn.api.Updater) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) Test(org.junit.Test)

Example 28 with Updater

use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.

the class TestUpdaters method testRMSPropUpdater.

@Test
public void testRMSPropUpdater() {
    double lr = 0.01;
    double rmsDecay = 0.25;
    Map<String, INDArray> lastG = new HashMap<>();
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).rmsDecay(rmsDecay).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.RMSPROP).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    int updaterStateSize = updater.stateSizeForLayer(layer);
    INDArray updaterState = Nd4j.create(1, updaterStateSize);
    updater.setStateViewArray(layer, updaterState, true);
    updater.update(layer, gradient, -1, 1);
    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
    double epsilon = 1e-8;
    for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
        key = entry.getKey();
        val = entry.getValue();
        INDArray lastGTmp = lastG.get(key);
        if (lastGTmp == null)
            lastGTmp = Nd4j.zeros(val.shape());
        lastGTmp.muli(rmsDecay).addi(val.mul(val).muli(1 - rmsDecay));
        gradExpected = val.mul(lr).div(Transforms.sqrt(lastGTmp.add(epsilon)));
        assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
        lastG.put(key, lastGTmp);
    }
    assertEquals(rmsDecay, layer.conf().getLayer().getRmsDecay(), 1e-4);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Updater(org.deeplearning4j.nn.api.Updater) Test(org.junit.Test)

Example 29 with Updater

use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.

the class TestUpdaters method testSGDUpdater.

@Test
public void testSGDUpdater() {
    double lr = 0.05;
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    updater.update(layer, gradient, -1, 1);
    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
    for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
        val = entry.getValue();
        gradExpected = val.mul(lr);
        assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
    }
    assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Updater(org.deeplearning4j.nn.api.Updater) Test(org.junit.Test)

Example 30 with Updater

use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.

the class TestUpdaters method testMultiLayerUpdater.

@Test
public void testMultiLayerUpdater() throws Exception {
    Nd4j.getRandom().setSeed(12345L);
    double lr = 0.03;
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).momentum(0.6).list().layer(0, new DenseLayer.Builder().nIn(4).nOut(5).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).layer(1, new DenseLayer.Builder().nIn(5).nOut(6).updater(org.deeplearning4j.nn.conf.Updater.NONE).build()).layer(2, new DenseLayer.Builder().nIn(6).nOut(7).updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build()).layer(3, new DenseLayer.Builder().nIn(7).nOut(8).updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build()).build();
    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
    Updater updater = UpdaterCreator.getUpdater(net);
    assertNotNull(updater);
    assertTrue(updater.getClass() == MultiLayerUpdater.class);
    Field f = MultiLayerUpdater.class.getDeclaredField("layerUpdaters");
    f.setAccessible(true);
    Updater[] updaters = (Updater[]) f.get(updater);
    assertNotNull(updaters);
    assertTrue(updaters.length == net.getnLayers());
    assertTrue(updaters[0] instanceof LayerUpdater);
    assertTrue(updaters[1] instanceof LayerUpdater);
    assertTrue(updaters[2] instanceof LayerUpdater);
    assertTrue(updaters[3] instanceof LayerUpdater);
    int count = 0;
    for (Updater u : updaters) {
        LayerUpdater lu = (LayerUpdater) u;
        for (GradientUpdater gu : lu.updaterForVariable.values()) {
            switch(count) {
                case 0:
                    assertTrue(gu instanceof Sgd);
                    break;
                case 1:
                    assertTrue(gu instanceof org.nd4j.linalg.learning.NoOpUpdater);
                    break;
                case 2:
                    assertTrue(gu instanceof AdaGrad);
                    break;
                case 3:
                    assertTrue(gu instanceof Nesterovs);
                    break;
                default:
                    throw new RuntimeException();
            }
        }
        count++;
    }
    LayerUpdater u = (LayerUpdater) updaters[0];
    Updater[] uArr = new Updater[4];
    uArr[0] = new LayerUpdater();
    uArr[1] = new LayerUpdater();
    uArr[2] = new LayerUpdater();
    INDArray updaterState = Nd4j.create(1, 6 * 7 + 7, 'f');
    uArr[2].setStateViewArray(net.getLayer(2), updaterState, true);
    uArr[3] = new LayerUpdater();
    //        updaterStateSize = uArr[3].stateSizeForLayer(net.getLayer(3));
    updaterState = Nd4j.create(1, 7 * 8 + 8, 'f');
    uArr[3].setStateViewArray(net.getLayer(3), updaterState, true);
    int[] nIns = { 4, 5, 6, 7 };
    int[] nOuts = { 5, 6, 7, 8 };
    for (int i = 0; i < 5; i++) {
        Gradient gradient = new DefaultGradient();
        Map<String, INDArray> expectedGradient = new LinkedHashMap<>();
        for (int j = 0; j < net.getnLayers(); j++) {
            //Generate test gradient:
            INDArray wGrad = Nd4j.rand(nIns[j], nOuts[j]);
            INDArray bGrad = Nd4j.rand(1, nOuts[j]);
            String wKey = j + "_" + DefaultParamInitializer.WEIGHT_KEY;
            String bKey = j + "_" + DefaultParamInitializer.BIAS_KEY;
            gradient.setGradientFor(wKey, wGrad);
            gradient.setGradientFor(bKey, bGrad);
            //Also put copy of gradient through separate layer updaters to compare
            Gradient layerGradient = new DefaultGradient();
            layerGradient.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, wGrad.dup());
            layerGradient.setGradientFor(DefaultParamInitializer.BIAS_KEY, bGrad.dup());
            uArr[j].update(net.getLayer(j), layerGradient, i, 1);
            for (String s : layerGradient.gradientForVariable().keySet()) {
                expectedGradient.put(j + "_" + s, layerGradient.getGradientFor(s));
            }
        }
        updater.update(net, gradient, i, 1);
        assertEquals(gradient.gradientForVariable(), expectedGradient);
    }
}
Also used : org.nd4j.linalg.learning(org.nd4j.linalg.learning) Field(java.lang.reflect.Field) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) Updater(org.deeplearning4j.nn.api.Updater) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Test(org.junit.Test)

Aggregations

Updater (org.deeplearning4j.nn.api.Updater)37 Test (org.junit.Test)28 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)27 INDArray (org.nd4j.linalg.api.ndarray.INDArray)27 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)25 Gradient (org.deeplearning4j.nn.gradient.Gradient)25 DefaultGradient (org.deeplearning4j.nn.gradient.DefaultGradient)23 Layer (org.deeplearning4j.nn.api.Layer)21 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)18 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)9 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)8 ComputationGraphUpdater (org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater)5 HashMap (java.util.HashMap)4 Solver (org.deeplearning4j.optimize.Solver)4 ArrayList (java.util.ArrayList)2 Field (java.lang.reflect.Field)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 ZipEntry (java.util.zip.ZipEntry)1 ZipFile (java.util.zip.ZipFile)1 Persistable (org.deeplearning4j.api.storage.Persistable)1