use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class TestUpdaters method testAdaGradUpdater.
@Test
public void testAdaGradUpdater() {
double lr = 1e-2;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
int updaterStateSize = updater.stateSizeForLayer(layer);
INDArray updaterState = Nd4j.create(1, updaterStateSize);
updater.setStateViewArray(layer, updaterState, true);
updater.update(layer, gradient, -1, 1);
Gradient gradientDup = new DefaultGradient();
gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
val = entry.getValue();
gradExpected = Transforms.sqrt(val.mul(val).add(epsilon)).rdiv(lr).mul(val);
assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
}
assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4);
}
use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class TestUpdaters method testNestorovsUpdater.
@Test
public void testNestorovsUpdater() {
double lr = 1e-2;
double mu = 0.6;
INDArray v, vPrev;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).momentum(mu).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
int updaterStateSize = updater.stateSizeForLayer(layer);
INDArray updaterState = Nd4j.create(1, updaterStateSize);
updater.setStateViewArray(layer, updaterState, true);
updater.update(layer, gradient, -1, 1);
Gradient gradientDup = new DefaultGradient();
gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
val = entry.getValue();
v = Nd4j.zeros(val.shape());
vPrev = v;
v = vPrev.mul(mu).subi(val.mul(lr));
gradExpected = vPrev.muli(mu).addi(v.mul(-mu - 1));
assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
}
assertEquals(mu, layer.conf().getLayer().getMomentum(), 1e-4);
}
use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class TestUpdaters method testAdamUpdater.
@Test
public void testAdamUpdater() {
INDArray m, v;
double lr = 0.01;
int iteration = 0;
double beta1 = 0.8;
double beta2 = 0.888;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).iterations(iteration).adamMeanDecay(beta1).adamVarDecay(beta2).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.ADAM).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
int updaterStateSize = updater.stateSizeForLayer(layer);
INDArray updaterState = Nd4j.create(1, updaterStateSize);
updater.setStateViewArray(layer, updaterState, true);
updater.update(layer, gradient, iteration, 1);
double beta1t = FastMath.pow(beta1, iteration + 1);
double beta2t = FastMath.pow(beta2, iteration + 1);
double alphat = lr * FastMath.sqrt(1 - beta2t) / (1 - beta1t);
if (Double.isNaN(alphat) || alphat == 0.0)
alphat = epsilon;
Gradient gradientDup = new DefaultGradient();
gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
val = entry.getValue();
m = Nd4j.zeros(val.shape());
v = Nd4j.zeros(val.shape());
m.muli(beta1).addi(val.mul(1.0 - beta1));
v.muli(beta2).addi(val.mul(val).mul(1.0 - beta2));
gradExpected = m.mul(alphat).divi(Transforms.sqrt(v).addi(epsilon));
if (!gradExpected.equals(gradient.getGradientFor(entry.getKey()))) {
System.out.println(Arrays.toString(gradExpected.dup().data().asFloat()));
System.out.println(Arrays.toString(gradient.getGradientFor(entry.getKey()).dup().data().asFloat()));
}
assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
}
assertEquals(beta1, layer.conf().getLayer().getAdamMeanDecay(), 1e-4);
assertEquals(beta2, layer.conf().getLayer().getAdamVarDecay(), 1e-4);
}
use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class TestDecayPolicies method testLearningRateTorchStepDecaySingleLayer.
@Test
public void testLearningRateTorchStepDecaySingleLayer() {
int iterations = 20;
double lr = 1;
double decayRate = .5;
double steps = 10;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).learningRateDecayPolicy(LearningRatePolicy.TorchStep).lrPolicyDecayRate(decayRate).lrPolicySteps(steps).iterations(iterations).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
Gradient gradientActual = new DefaultGradient();
gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
double expectedLr = lr;
for (int i = 0; i < iterations; i++) {
updater.update(layer, gradientActual, i, 1);
if (i > 1 && steps % i == 0)
expectedLr = calcTorchStepDecay(expectedLr, decayRate);
assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
}
}
use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class TestDecayPolicies method testLearningRateScheduleSingleLayer.
@Test
public void testLearningRateScheduleSingleLayer() {
Map<Integer, Double> learningRateAfter = new HashMap<>();
learningRateAfter.put(1, 0.2);
int iterations = 2;
for (org.deeplearning4j.nn.conf.Updater updaterFunc : updaters) {
double lr = 1e-2;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).learningRateSchedule(learningRateAfter).learningRateDecayPolicy(LearningRatePolicy.Schedule).iterations(iterations).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(updaterFunc).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
int stateSize = updater.stateSizeForLayer(layer);
if (stateSize > 0)
updater.setStateViewArray(layer, Nd4j.create(1, stateSize), true);
Gradient gradientActual = new DefaultGradient();
gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
Gradient gradientExpected = new DefaultGradient();
gradientExpected.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
gradientExpected.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
for (int i = 0; i < 2; i++) {
updater.update(layer, gradientActual, i, 1);
if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.SGD))
lr = testSGDComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.ADAGRAD))
lr = testAdaGradComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.ADAM))
lr = testAdamComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.RMSPROP))
lr = testRMSPropComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
assertEquals(lr, layer.conf().getLearningRateByParam("W"), 1e-4);
}
}
}
Aggregations