use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.
the class TestDecayPolicies method testLearningRateStepDecaySingleLayer.
@Test
public void testLearningRateStepDecaySingleLayer() {
int iterations = 2;
double lr = 1e-2;
double decayRate = 2;
double steps = 3;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).learningRateDecayPolicy(LearningRatePolicy.Step).lrPolicyDecayRate(decayRate).lrPolicySteps(steps).iterations(iterations).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
Gradient gradientActual = new DefaultGradient();
gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
for (int i = 0; i < iterations; i++) {
updater.update(layer, gradientActual, i, 1);
double expectedLr = calcStepDecay(lr, decayRate, i, steps);
assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
}
}
use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.
the class TestUpdaters method testAdaGradUpdater.
@Test
public void testAdaGradUpdater() {
double lr = 1e-2;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
int updaterStateSize = updater.stateSizeForLayer(layer);
INDArray updaterState = Nd4j.create(1, updaterStateSize);
updater.setStateViewArray(layer, updaterState, true);
updater.update(layer, gradient, -1, 1);
Gradient gradientDup = new DefaultGradient();
gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
val = entry.getValue();
gradExpected = Transforms.sqrt(val.mul(val).add(epsilon)).rdiv(lr).mul(val);
assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
}
assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4);
}
use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.
the class TestUpdaters method testEpsilonAllUpdaters.
@Test
public void testEpsilonAllUpdaters() {
double e = 7e-2;
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().epsilon(e).list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).updater(org.deeplearning4j.nn.conf.Updater.ADAM).build()).layer(1, new DenseLayer.Builder().nIn(2).nOut(2).updater(org.deeplearning4j.nn.conf.Updater.RMSPROP).build()).layer(2, new DenseLayer.Builder().nIn(2).nOut(2).updater(org.deeplearning4j.nn.conf.Updater.ADADELTA).build()).layer(3, new DenseLayer.Builder().nIn(2).nOut(2).updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
MultiLayerUpdater updater = (MultiLayerUpdater) net.getUpdater();
Updater[] updaters = updater.getLayerUpdaters();
LayerUpdater u0 = (LayerUpdater) updaters[0];
Adam adam = (Adam) u0.updaterForVariable.get("W");
assertEquals(e, adam.getEpsilon(), 0.0);
LayerUpdater u1 = (LayerUpdater) updaters[1];
RmsProp rmsProp = (RmsProp) u1.updaterForVariable.get("W");
assertEquals(e, rmsProp.getEpsilon(), 0.0);
LayerUpdater u2 = (LayerUpdater) updaters[2];
AdaDelta adaDelta = (AdaDelta) u2.updaterForVariable.get("W");
assertEquals(e, adaDelta.getEpsilon(), 0.0);
LayerUpdater u3 = (LayerUpdater) updaters[3];
AdaGrad adaGrad = (AdaGrad) u3.updaterForVariable.get("W");
assertEquals(e, adaGrad.getEpsilon(), 0.0);
}
use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.
the class TestUpdaters method testNestorovsUpdater.
@Test
public void testNestorovsUpdater() {
double lr = 1e-2;
double mu = 0.6;
INDArray v, vPrev;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).momentum(mu).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
int updaterStateSize = updater.stateSizeForLayer(layer);
INDArray updaterState = Nd4j.create(1, updaterStateSize);
updater.setStateViewArray(layer, updaterState, true);
updater.update(layer, gradient, -1, 1);
Gradient gradientDup = new DefaultGradient();
gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
val = entry.getValue();
v = Nd4j.zeros(val.shape());
vPrev = v;
v = vPrev.mul(mu).subi(val.mul(lr));
gradExpected = vPrev.muli(mu).addi(v.mul(-mu - 1));
assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
}
assertEquals(mu, layer.conf().getLayer().getMomentum(), 1e-4);
}
use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.
the class TestUpdaters method testAdamUpdater.
@Test
public void testAdamUpdater() {
INDArray m, v;
double lr = 0.01;
int iteration = 0;
double beta1 = 0.8;
double beta2 = 0.888;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).iterations(iteration).adamMeanDecay(beta1).adamVarDecay(beta2).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.ADAM).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
int updaterStateSize = updater.stateSizeForLayer(layer);
INDArray updaterState = Nd4j.create(1, updaterStateSize);
updater.setStateViewArray(layer, updaterState, true);
updater.update(layer, gradient, iteration, 1);
double beta1t = FastMath.pow(beta1, iteration + 1);
double beta2t = FastMath.pow(beta2, iteration + 1);
double alphat = lr * FastMath.sqrt(1 - beta2t) / (1 - beta1t);
if (Double.isNaN(alphat) || alphat == 0.0)
alphat = epsilon;
Gradient gradientDup = new DefaultGradient();
gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
val = entry.getValue();
m = Nd4j.zeros(val.shape());
v = Nd4j.zeros(val.shape());
m.muli(beta1).addi(val.mul(1.0 - beta1));
v.muli(beta2).addi(val.mul(val).mul(1.0 - beta2));
gradExpected = m.mul(alphat).divi(Transforms.sqrt(v).addi(epsilon));
if (!gradExpected.equals(gradient.getGradientFor(entry.getKey()))) {
System.out.println(Arrays.toString(gradExpected.dup().data().asFloat()));
System.out.println(Arrays.toString(gradient.getGradientFor(entry.getKey()).dup().data().asFloat()));
}
assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
}
assertEquals(beta1, layer.conf().getLayer().getAdamMeanDecay(), 1e-4);
assertEquals(beta2, layer.conf().getLayer().getAdamVarDecay(), 1e-4);
}
Aggregations