Search in sources :

Example 21 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class TestUpdaters method testAdaGradUpdater.

@Test
public void testAdaGradUpdater() {
    double lr = 1e-2;
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    int updaterStateSize = updater.stateSizeForLayer(layer);
    INDArray updaterState = Nd4j.create(1, updaterStateSize);
    updater.setStateViewArray(layer, updaterState, true);
    updater.update(layer, gradient, -1, 1);
    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
    for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
        val = entry.getValue();
        gradExpected = Transforms.sqrt(val.mul(val).add(epsilon)).rdiv(lr).mul(val);
        assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
    }
    assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Updater(org.deeplearning4j.nn.api.Updater) Test(org.junit.Test)

Example 22 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class TestUpdaters method testNestorovsUpdater.

@Test
public void testNestorovsUpdater() {
    double lr = 1e-2;
    double mu = 0.6;
    INDArray v, vPrev;
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).momentum(mu).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    int updaterStateSize = updater.stateSizeForLayer(layer);
    INDArray updaterState = Nd4j.create(1, updaterStateSize);
    updater.setStateViewArray(layer, updaterState, true);
    updater.update(layer, gradient, -1, 1);
    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
    for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
        val = entry.getValue();
        v = Nd4j.zeros(val.shape());
        vPrev = v;
        v = vPrev.mul(mu).subi(val.mul(lr));
        gradExpected = vPrev.muli(mu).addi(v.mul(-mu - 1));
        assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
    }
    assertEquals(mu, layer.conf().getLayer().getMomentum(), 1e-4);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Updater(org.deeplearning4j.nn.api.Updater) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) Test(org.junit.Test)

Example 23 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class TestUpdaters method testAdamUpdater.

@Test
public void testAdamUpdater() {
    INDArray m, v;
    double lr = 0.01;
    int iteration = 0;
    double beta1 = 0.8;
    double beta2 = 0.888;
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).iterations(iteration).adamMeanDecay(beta1).adamVarDecay(beta2).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.ADAM).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    int updaterStateSize = updater.stateSizeForLayer(layer);
    INDArray updaterState = Nd4j.create(1, updaterStateSize);
    updater.setStateViewArray(layer, updaterState, true);
    updater.update(layer, gradient, iteration, 1);
    double beta1t = FastMath.pow(beta1, iteration + 1);
    double beta2t = FastMath.pow(beta2, iteration + 1);
    double alphat = lr * FastMath.sqrt(1 - beta2t) / (1 - beta1t);
    if (Double.isNaN(alphat) || alphat == 0.0)
        alphat = epsilon;
    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
    for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
        val = entry.getValue();
        m = Nd4j.zeros(val.shape());
        v = Nd4j.zeros(val.shape());
        m.muli(beta1).addi(val.mul(1.0 - beta1));
        v.muli(beta2).addi(val.mul(val).mul(1.0 - beta2));
        gradExpected = m.mul(alphat).divi(Transforms.sqrt(v).addi(epsilon));
        if (!gradExpected.equals(gradient.getGradientFor(entry.getKey()))) {
            System.out.println(Arrays.toString(gradExpected.dup().data().asFloat()));
            System.out.println(Arrays.toString(gradient.getGradientFor(entry.getKey()).dup().data().asFloat()));
        }
        assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
    }
    assertEquals(beta1, layer.conf().getLayer().getAdamMeanDecay(), 1e-4);
    assertEquals(beta2, layer.conf().getLayer().getAdamVarDecay(), 1e-4);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Updater(org.deeplearning4j.nn.api.Updater) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) Test(org.junit.Test)

Example 24 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class TestDecayPolicies method testLearningRateTorchStepDecaySingleLayer.

@Test
public void testLearningRateTorchStepDecaySingleLayer() {
    int iterations = 20;
    double lr = 1;
    double decayRate = .5;
    double steps = 10;
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).learningRateDecayPolicy(LearningRatePolicy.TorchStep).lrPolicyDecayRate(decayRate).lrPolicySteps(steps).iterations(iterations).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    Gradient gradientActual = new DefaultGradient();
    gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
    gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
    double expectedLr = lr;
    for (int i = 0; i < iterations; i++) {
        updater.update(layer, gradientActual, i, 1);
        if (i > 1 && steps % i == 0)
            expectedLr = calcTorchStepDecay(expectedLr, decayRate);
        assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
        assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
    }
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Updater(org.deeplearning4j.nn.api.Updater) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) Test(org.junit.Test)

Example 25 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class TestDecayPolicies method testLearningRateScheduleSingleLayer.

@Test
public void testLearningRateScheduleSingleLayer() {
    Map<Integer, Double> learningRateAfter = new HashMap<>();
    learningRateAfter.put(1, 0.2);
    int iterations = 2;
    for (org.deeplearning4j.nn.conf.Updater updaterFunc : updaters) {
        double lr = 1e-2;
        NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).learningRateSchedule(learningRateAfter).learningRateDecayPolicy(LearningRatePolicy.Schedule).iterations(iterations).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(updaterFunc).build()).build();
        int numParams = conf.getLayer().initializer().numParams(conf);
        INDArray params = Nd4j.create(1, numParams);
        Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
        Updater updater = UpdaterCreator.getUpdater(layer);
        int stateSize = updater.stateSizeForLayer(layer);
        if (stateSize > 0)
            updater.setStateViewArray(layer, Nd4j.create(1, stateSize), true);
        Gradient gradientActual = new DefaultGradient();
        gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
        gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
        Gradient gradientExpected = new DefaultGradient();
        gradientExpected.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
        gradientExpected.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
        for (int i = 0; i < 2; i++) {
            updater.update(layer, gradientActual, i, 1);
            if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.SGD))
                lr = testSGDComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
            else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.ADAGRAD))
                lr = testAdaGradComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
            else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.ADAM))
                lr = testAdamComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
            else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.RMSPROP))
                lr = testRMSPropComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
            assertEquals(lr, layer.conf().getLearningRateByParam("W"), 1e-4);
        }
    }
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) HashMap(java.util.HashMap) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Updater(org.deeplearning4j.nn.api.Updater) Test(org.junit.Test)

Aggregations

Gradient (org.deeplearning4j.nn.gradient.Gradient)105 INDArray (org.nd4j.linalg.api.ndarray.INDArray)100 DefaultGradient (org.deeplearning4j.nn.gradient.DefaultGradient)72 Test (org.junit.Test)52 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)35 Pair (org.deeplearning4j.berkeley.Pair)28 Layer (org.deeplearning4j.nn.api.Layer)28 Updater (org.deeplearning4j.nn.api.Updater)25 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)24 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)21 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)9 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)8 IActivation (org.nd4j.linalg.activations.IActivation)6 HashMap (java.util.HashMap)5 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)5 ArrayList (java.util.ArrayList)4 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)4 DL4JInvalidInputException (org.deeplearning4j.exception.DL4JInvalidInputException)4 IOutputLayer (org.deeplearning4j.nn.api.layers.IOutputLayer)4 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)4