use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.
the class TestDecayPolicies method testLearningRateInverseDecaySingleLayer.
@Test
public void testLearningRateInverseDecaySingleLayer() {
int iterations = 2;
double lr = 1e-2;
double decayRate = 2;
double power = 3;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(decayRate).lrPolicyPower(power).iterations(iterations).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
Gradient gradientActual = new DefaultGradient();
gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
for (int i = 0; i < iterations; i++) {
updater.update(layer, gradientActual, i, 1);
double expectedLr = calcInverseDecay(lr, decayRate, i, power);
assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
}
}
use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.
the class TestDecayPolicies method testLearningRateScheduleMLN.
@Test
public void testLearningRateScheduleMLN() {
Map<Integer, Double> learningRateAfter = new HashMap<>();
learningRateAfter.put(1, 0.2);
int iterations = 2;
int[] nIns = { 4, 2 };
int[] nOuts = { 2, 3 };
for (org.deeplearning4j.nn.conf.Updater updaterFunc : updaters) {
double lr = 1e-2;
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).learningRateDecayPolicy(LearningRatePolicy.Schedule).learningRateSchedule(learningRateAfter).iterations(iterations).updater(updaterFunc).list().layer(0, new DenseLayer.Builder().nIn(nIns[0]).nOut(nOuts[0]).build()).layer(1, new OutputLayer.Builder().nIn(nIns[1]).nOut(nOuts[1]).build()).backprop(true).pretrain(false).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
Updater updater = UpdaterCreator.getUpdater(net);
String wKey, bKey;
for (int i = 0; i < 2; i++) {
Gradient gradientActual = new DefaultGradient();
Gradient gradientExpected = new DefaultGradient();
for (int k = 0; k < net.getnLayers(); k++) {
wKey = String.valueOf(k) + "_" + DefaultParamInitializer.WEIGHT_KEY;
gradientActual.setGradientFor(wKey, Nd4j.ones(nIns[k], nOuts[k]));
gradientExpected.setGradientFor(wKey, Nd4j.ones(nIns[k], nOuts[k]));
bKey = String.valueOf(k) + "_" + DefaultParamInitializer.BIAS_KEY;
gradientActual.setGradientFor(bKey, Nd4j.ones(1, nOuts[k]));
gradientExpected.setGradientFor(bKey, Nd4j.ones(1, nOuts[k]));
}
updater.update(net, gradientActual, i, 1);
if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.SGD))
lr = testSGDComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.ADAGRAD))
lr = testAdaGradComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.ADAM))
lr = testAdamComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.RMSPROP))
lr = testRMSPropComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
if (i == 0)
assertEquals(lr, net.getLayer(1).conf().getLearningRateByParam("W"), lr);
else
assertEquals(lr, net.getLayer(1).conf().getLearningRateByParam("W"), learningRateAfter.get(1));
}
}
}
use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.
the class TestDecayPolicies method testLearningRatePolyDecaySingleLayer.
@Test
public void testLearningRatePolyDecaySingleLayer() {
int iterations = 2;
double lr = 1e-2;
double power = 3;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).learningRateDecayPolicy(LearningRatePolicy.Poly).lrPolicyPower(power).iterations(iterations).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
Gradient gradientActual = new DefaultGradient();
gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
for (int i = 0; i < iterations; i++) {
updater.update(layer, gradientActual, i, 1);
double expectedLr = calcPolyDecay(lr, i, power, iterations);
assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
}
}
use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.
the class TestGradientNormalization method testL2ClippingPerParamType.
@Test
public void testL2ClippingPerParamType() {
Nd4j.getRandom().setSeed(12345);
double threshold = 3;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer(new DenseLayer.Builder().nIn(10).nOut(20).updater(org.deeplearning4j.nn.conf.Updater.NONE).gradientNormalization(GradientNormalization.ClipL2PerParamType).gradientNormalizationThreshold(threshold).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
INDArray weightGrad = Nd4j.rand(10, 20).muli(0.05);
INDArray biasGrad = Nd4j.rand(1, 10).muli(10);
INDArray weightGradCopy = weightGrad.dup();
INDArray biasGradCopy = biasGrad.dup();
Gradient gradient = new DefaultGradient();
gradient.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGrad);
gradient.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGrad);
double weightL2 = weightGrad.norm2Number().doubleValue();
double biasL2 = biasGrad.norm2Number().doubleValue();
assertTrue(weightL2 < threshold);
assertTrue(biasL2 > threshold);
updater.update(layer, gradient, 0, 1);
//weight norm2 < threshold -> no change
assertEquals(weightGradCopy, weightGrad);
//bias norm2 > threshold -> rescale
assertNotEquals(biasGradCopy, biasGrad);
double biasScalingFactor = threshold / biasL2;
INDArray expectedBiasGrad = biasGradCopy.mul(biasScalingFactor);
assertEquals(expectedBiasGrad, gradient.getGradientFor(DefaultParamInitializer.BIAS_KEY));
}
use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.
the class TestGradientNormalization method testAbsValueClippingPerElement.
@Test
public void testAbsValueClippingPerElement() {
Nd4j.getRandom().setSeed(12345);
double threshold = 3;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer(new DenseLayer.Builder().nIn(10).nOut(20).updater(org.deeplearning4j.nn.conf.Updater.NONE).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(threshold).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
INDArray weightGrad = Nd4j.rand(10, 20).muli(10).subi(5);
INDArray biasGrad = Nd4j.rand(1, 10).muli(10).subi(5);
INDArray weightGradCopy = weightGrad.dup();
INDArray biasGradCopy = biasGrad.dup();
Gradient gradient = new DefaultGradient();
gradient.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGrad);
gradient.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGrad);
updater.update(layer, gradient, 0, 1);
assertNotEquals(weightGradCopy, weightGrad);
assertNotEquals(biasGradCopy, biasGrad);
INDArray expectedWeightGrad = weightGradCopy.dup();
for (int i = 0; i < expectedWeightGrad.length(); i++) {
double d = expectedWeightGrad.getDouble(i);
if (d > threshold)
expectedWeightGrad.putScalar(i, threshold);
else if (d < -threshold)
expectedWeightGrad.putScalar(i, -threshold);
}
INDArray expectedBiasGrad = biasGradCopy.dup();
for (int i = 0; i < expectedBiasGrad.length(); i++) {
double d = expectedBiasGrad.getDouble(i);
if (d > threshold)
expectedBiasGrad.putScalar(i, threshold);
else if (d < -threshold)
expectedBiasGrad.putScalar(i, -threshold);
}
assertEquals(expectedWeightGrad, gradient.getGradientFor(DefaultParamInitializer.WEIGHT_KEY));
assertEquals(expectedBiasGrad, gradient.getGradientFor(DefaultParamInitializer.BIAS_KEY));
}
Aggregations