Search in sources :

Example 41 with DefaultGradient

use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.

the class TestUpdaters method testRMSPropUpdater.

@Test
public void testRMSPropUpdater() {
    double lr = 0.01;
    double rmsDecay = 0.25;
    Map<String, INDArray> lastG = new HashMap<>();
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).rmsDecay(rmsDecay).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.RMSPROP).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    int updaterStateSize = updater.stateSizeForLayer(layer);
    INDArray updaterState = Nd4j.create(1, updaterStateSize);
    updater.setStateViewArray(layer, updaterState, true);
    updater.update(layer, gradient, -1, 1);
    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
    double epsilon = 1e-8;
    for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
        key = entry.getKey();
        val = entry.getValue();
        INDArray lastGTmp = lastG.get(key);
        if (lastGTmp == null)
            lastGTmp = Nd4j.zeros(val.shape());
        lastGTmp.muli(rmsDecay).addi(val.mul(val).muli(1 - rmsDecay));
        gradExpected = val.mul(lr).div(Transforms.sqrt(lastGTmp.add(epsilon)));
        assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
        lastG.put(key, lastGTmp);
    }
    assertEquals(rmsDecay, layer.conf().getLayer().getRmsDecay(), 1e-4);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Updater(org.deeplearning4j.nn.api.Updater) Test(org.junit.Test)

Example 42 with DefaultGradient

use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.

the class TestUpdaters method testSGDUpdater.

@Test
public void testSGDUpdater() {
    double lr = 0.05;
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    updater.update(layer, gradient, -1, 1);
    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
    for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
        val = entry.getValue();
        gradExpected = val.mul(lr);
        assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
    }
    assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Updater(org.deeplearning4j.nn.api.Updater) Test(org.junit.Test)

Example 43 with DefaultGradient

use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.

the class TestUpdaters method testMultiLayerUpdater.

@Test
public void testMultiLayerUpdater() throws Exception {
    Nd4j.getRandom().setSeed(12345L);
    double lr = 0.03;
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).momentum(0.6).list().layer(0, new DenseLayer.Builder().nIn(4).nOut(5).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).layer(1, new DenseLayer.Builder().nIn(5).nOut(6).updater(org.deeplearning4j.nn.conf.Updater.NONE).build()).layer(2, new DenseLayer.Builder().nIn(6).nOut(7).updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build()).layer(3, new DenseLayer.Builder().nIn(7).nOut(8).updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build()).build();
    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
    Updater updater = UpdaterCreator.getUpdater(net);
    assertNotNull(updater);
    assertTrue(updater.getClass() == MultiLayerUpdater.class);
    Field f = MultiLayerUpdater.class.getDeclaredField("layerUpdaters");
    f.setAccessible(true);
    Updater[] updaters = (Updater[]) f.get(updater);
    assertNotNull(updaters);
    assertTrue(updaters.length == net.getnLayers());
    assertTrue(updaters[0] instanceof LayerUpdater);
    assertTrue(updaters[1] instanceof LayerUpdater);
    assertTrue(updaters[2] instanceof LayerUpdater);
    assertTrue(updaters[3] instanceof LayerUpdater);
    int count = 0;
    for (Updater u : updaters) {
        LayerUpdater lu = (LayerUpdater) u;
        for (GradientUpdater gu : lu.updaterForVariable.values()) {
            switch(count) {
                case 0:
                    assertTrue(gu instanceof Sgd);
                    break;
                case 1:
                    assertTrue(gu instanceof org.nd4j.linalg.learning.NoOpUpdater);
                    break;
                case 2:
                    assertTrue(gu instanceof AdaGrad);
                    break;
                case 3:
                    assertTrue(gu instanceof Nesterovs);
                    break;
                default:
                    throw new RuntimeException();
            }
        }
        count++;
    }
    LayerUpdater u = (LayerUpdater) updaters[0];
    Updater[] uArr = new Updater[4];
    uArr[0] = new LayerUpdater();
    uArr[1] = new LayerUpdater();
    uArr[2] = new LayerUpdater();
    INDArray updaterState = Nd4j.create(1, 6 * 7 + 7, 'f');
    uArr[2].setStateViewArray(net.getLayer(2), updaterState, true);
    uArr[3] = new LayerUpdater();
    //        updaterStateSize = uArr[3].stateSizeForLayer(net.getLayer(3));
    updaterState = Nd4j.create(1, 7 * 8 + 8, 'f');
    uArr[3].setStateViewArray(net.getLayer(3), updaterState, true);
    int[] nIns = { 4, 5, 6, 7 };
    int[] nOuts = { 5, 6, 7, 8 };
    for (int i = 0; i < 5; i++) {
        Gradient gradient = new DefaultGradient();
        Map<String, INDArray> expectedGradient = new LinkedHashMap<>();
        for (int j = 0; j < net.getnLayers(); j++) {
            //Generate test gradient:
            INDArray wGrad = Nd4j.rand(nIns[j], nOuts[j]);
            INDArray bGrad = Nd4j.rand(1, nOuts[j]);
            String wKey = j + "_" + DefaultParamInitializer.WEIGHT_KEY;
            String bKey = j + "_" + DefaultParamInitializer.BIAS_KEY;
            gradient.setGradientFor(wKey, wGrad);
            gradient.setGradientFor(bKey, bGrad);
            //Also put copy of gradient through separate layer updaters to compare
            Gradient layerGradient = new DefaultGradient();
            layerGradient.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, wGrad.dup());
            layerGradient.setGradientFor(DefaultParamInitializer.BIAS_KEY, bGrad.dup());
            uArr[j].update(net.getLayer(j), layerGradient, i, 1);
            for (String s : layerGradient.gradientForVariable().keySet()) {
                expectedGradient.put(j + "_" + s, layerGradient.getGradientFor(s));
            }
        }
        updater.update(net, gradient, i, 1);
        assertEquals(gradient.gradientForVariable(), expectedGradient);
    }
}
Also used : org.nd4j.linalg.learning(org.nd4j.linalg.learning) Field(java.lang.reflect.Field) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) Updater(org.deeplearning4j.nn.api.Updater) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Test(org.junit.Test)

Example 44 with DefaultGradient

use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.

the class TestUpdaters method testPretrain.

@Test
public void testPretrain() {
    double lr = 0.05;
    gradient.setGradientFor(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbiasGradient.dup());
    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
    gradientDup.setGradientFor(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbiasGradient.dup());
    Gradient gradientDup2 = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
    gradientDup.setGradientFor(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbiasGradient.dup());
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).seed(42).layer(new org.deeplearning4j.nn.conf.layers.RBM.Builder().lossFunction(LossFunctions.LossFunction.COSINE_PROXIMITY).activation(Activation.IDENTITY).updater(org.deeplearning4j.nn.conf.Updater.SGD).nIn(nIn).nOut(nOut).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    boolean preTrain = true;
    conf.setPretrain(preTrain);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    updater.update(layer, gradient, -1, 1);
    for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
        val = entry.getValue();
        gradExpected = val.mul(lr);
        assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
    }
    assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4);
    preTrain = false;
    conf.setPretrain(preTrain);
    gradient = gradientDup;
    params = Nd4j.create(1, numParams);
    layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    updater.update(layer, gradient, -1, 1);
    for (Map.Entry<String, INDArray> entry : gradientDup2.gradientForVariable().entrySet()) {
        val = entry.getValue();
        if (entry.getKey() != "vb")
            gradExpected = val.mul(lr);
        else
            gradExpected = val;
        assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
    }
    assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Updater(org.deeplearning4j.nn.api.Updater) Test(org.junit.Test)

Example 45 with DefaultGradient

use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.

the class TestUpdaters method testAdaDeltaUpdate.

@Test
public void testAdaDeltaUpdate() {
    INDArray dxSquared;
    Map<String, INDArray> msg = new HashMap<>();
    Map<String, INDArray> msdx = new HashMap<>();
    double rho = 0.85;
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().rho(rho).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.ADADELTA).epsilon(Nd4j.EPS_THRESHOLD).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    int updaterStateSize = updater.stateSizeForLayer(layer);
    INDArray updaterState = Nd4j.create(1, updaterStateSize);
    updater.setStateViewArray(layer, updaterState, true);
    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
    for (int i = 0; i < 2; i++) {
        updater.update(layer, gradient, i, 1);
        for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
            key = entry.getKey();
            val = entry.getValue();
            INDArray msgTmp = msg.get(key);
            INDArray msdxTmp = msdx.get(key);
            if (msgTmp == null) {
                msgTmp = Nd4j.zeros(val.shape());
                msdxTmp = Nd4j.zeros(val.shape());
            }
            msgTmp.muli(rho);
            msgTmp.addi(val.mul(val).muli(1 - rho));
            gradExpected = Transforms.sqrt(msdxTmp.add(Nd4j.EPS_THRESHOLD)).divi(Transforms.sqrt(msgTmp.add(Nd4j.EPS_THRESHOLD))).muli(val);
            gradientDup.setGradientFor(key, gradExpected);
            assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
            msdxTmp.muli(rho);
            dxSquared = gradExpected.mul(gradExpected);
            msdxTmp.addi(dxSquared.muli(1 - rho));
            msg.put(key, msgTmp);
            msdx.put(key, msdxTmp);
        }
        assertEquals(rho, layer.conf().getLayer().getRho(), 1e-4);
    }
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) Updater(org.deeplearning4j.nn.api.Updater) Test(org.junit.Test)

Aggregations

DefaultGradient (org.deeplearning4j.nn.gradient.DefaultGradient)59 Gradient (org.deeplearning4j.nn.gradient.Gradient)58 INDArray (org.nd4j.linalg.api.ndarray.INDArray)56 Test (org.junit.Test)26 Pair (org.deeplearning4j.berkeley.Pair)23 Updater (org.deeplearning4j.nn.api.Updater)23 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)22 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)22 Layer (org.deeplearning4j.nn.api.Layer)20 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)16 HashMap (java.util.HashMap)5 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)4 Allocator (org.nd4j.jita.allocator.Allocator)4 AtomicAllocator (org.nd4j.jita.allocator.impl.AtomicAllocator)4 IActivation (org.nd4j.linalg.activations.IActivation)4 GridExecutioner (org.nd4j.linalg.api.ops.executioner.GridExecutioner)4 CudaContext (org.nd4j.linalg.jcublas.context.CudaContext)4 Map (java.util.Map)3 DoublePointer (org.bytedeco.javacpp.DoublePointer)3 FloatPointer (org.bytedeco.javacpp.FloatPointer)3