use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.
the class TestGradientNormalization method testRenormalizatonPerLayer.
@Test
public void testRenormalizatonPerLayer() {
Nd4j.getRandom().setSeed(12345);
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer(new DenseLayer.Builder().nIn(10).nOut(20).updater(org.deeplearning4j.nn.conf.Updater.NONE).gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
INDArray weightGrad = Nd4j.rand(10, 20);
INDArray biasGrad = Nd4j.rand(1, 10);
INDArray weightGradCopy = weightGrad.dup();
INDArray biasGradCopy = biasGrad.dup();
Gradient gradient = new DefaultGradient();
gradient.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGrad);
gradient.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGrad);
updater.update(layer, gradient, 0, 1);
assertNotEquals(weightGradCopy, weightGrad);
assertNotEquals(biasGradCopy, biasGrad);
double sumSquaresWeight = weightGradCopy.mul(weightGradCopy).sumNumber().doubleValue();
double sumSquaresBias = biasGradCopy.mul(biasGradCopy).sumNumber().doubleValue();
double sumSquares = sumSquaresWeight + sumSquaresBias;
double l2Layer = Math.sqrt(sumSquares);
INDArray normWeightsExpected = weightGradCopy.div(l2Layer);
INDArray normBiasExpected = biasGradCopy.div(l2Layer);
double l2Weight = gradient.getGradientFor(DefaultParamInitializer.WEIGHT_KEY).norm2Number().doubleValue();
double l2Bias = gradient.getGradientFor(DefaultParamInitializer.BIAS_KEY).norm2Number().doubleValue();
assertTrue(!Double.isNaN(l2Weight) && l2Weight > 0.0);
assertTrue(!Double.isNaN(l2Bias) && l2Bias > 0.0);
assertEquals(normWeightsExpected, gradient.getGradientFor(DefaultParamInitializer.WEIGHT_KEY));
assertEquals(normBiasExpected, gradient.getGradientFor(DefaultParamInitializer.BIAS_KEY));
}
use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.
the class TestGradientNormalization method testRenormalizationPerParamType.
@Test
public void testRenormalizationPerParamType() {
Nd4j.getRandom().setSeed(12345);
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer(new DenseLayer.Builder().nIn(10).nOut(20).updater(org.deeplearning4j.nn.conf.Updater.NONE).gradientNormalization(GradientNormalization.RenormalizeL2PerParamType).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
INDArray weightGrad = Nd4j.rand(10, 20);
INDArray biasGrad = Nd4j.rand(1, 10);
INDArray weightGradCopy = weightGrad.dup();
INDArray biasGradCopy = biasGrad.dup();
Gradient gradient = new DefaultGradient();
gradient.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGrad);
gradient.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGrad);
updater.update(layer, gradient, 0, 1);
INDArray normWeightsExpected = weightGradCopy.div(weightGradCopy.norm2Number());
INDArray normBiasExpected = biasGradCopy.div(biasGradCopy.norm2Number());
assertEquals(normWeightsExpected, gradient.getGradientFor(DefaultParamInitializer.WEIGHT_KEY));
assertEquals(normBiasExpected, gradient.getGradientFor(DefaultParamInitializer.BIAS_KEY));
}
use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.
the class TestUpdaters method testRMSPropUpdater.
@Test
public void testRMSPropUpdater() {
double lr = 0.01;
double rmsDecay = 0.25;
Map<String, INDArray> lastG = new HashMap<>();
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).rmsDecay(rmsDecay).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.RMSPROP).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
int updaterStateSize = updater.stateSizeForLayer(layer);
INDArray updaterState = Nd4j.create(1, updaterStateSize);
updater.setStateViewArray(layer, updaterState, true);
updater.update(layer, gradient, -1, 1);
Gradient gradientDup = new DefaultGradient();
gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
double epsilon = 1e-8;
for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
key = entry.getKey();
val = entry.getValue();
INDArray lastGTmp = lastG.get(key);
if (lastGTmp == null)
lastGTmp = Nd4j.zeros(val.shape());
lastGTmp.muli(rmsDecay).addi(val.mul(val).muli(1 - rmsDecay));
gradExpected = val.mul(lr).div(Transforms.sqrt(lastGTmp.add(epsilon)));
assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
lastG.put(key, lastGTmp);
}
assertEquals(rmsDecay, layer.conf().getLayer().getRmsDecay(), 1e-4);
}
use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.
the class TestUpdaters method testSGDUpdater.
@Test
public void testSGDUpdater() {
double lr = 0.05;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
updater.update(layer, gradient, -1, 1);
Gradient gradientDup = new DefaultGradient();
gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
val = entry.getValue();
gradExpected = val.mul(lr);
assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
}
assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4);
}
use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.
the class TestUpdaters method testMultiLayerUpdater.
@Test
public void testMultiLayerUpdater() throws Exception {
Nd4j.getRandom().setSeed(12345L);
double lr = 0.03;
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).momentum(0.6).list().layer(0, new DenseLayer.Builder().nIn(4).nOut(5).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).layer(1, new DenseLayer.Builder().nIn(5).nOut(6).updater(org.deeplearning4j.nn.conf.Updater.NONE).build()).layer(2, new DenseLayer.Builder().nIn(6).nOut(7).updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build()).layer(3, new DenseLayer.Builder().nIn(7).nOut(8).updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
Updater updater = UpdaterCreator.getUpdater(net);
assertNotNull(updater);
assertTrue(updater.getClass() == MultiLayerUpdater.class);
Field f = MultiLayerUpdater.class.getDeclaredField("layerUpdaters");
f.setAccessible(true);
Updater[] updaters = (Updater[]) f.get(updater);
assertNotNull(updaters);
assertTrue(updaters.length == net.getnLayers());
assertTrue(updaters[0] instanceof LayerUpdater);
assertTrue(updaters[1] instanceof LayerUpdater);
assertTrue(updaters[2] instanceof LayerUpdater);
assertTrue(updaters[3] instanceof LayerUpdater);
int count = 0;
for (Updater u : updaters) {
LayerUpdater lu = (LayerUpdater) u;
for (GradientUpdater gu : lu.updaterForVariable.values()) {
switch(count) {
case 0:
assertTrue(gu instanceof Sgd);
break;
case 1:
assertTrue(gu instanceof org.nd4j.linalg.learning.NoOpUpdater);
break;
case 2:
assertTrue(gu instanceof AdaGrad);
break;
case 3:
assertTrue(gu instanceof Nesterovs);
break;
default:
throw new RuntimeException();
}
}
count++;
}
LayerUpdater u = (LayerUpdater) updaters[0];
Updater[] uArr = new Updater[4];
uArr[0] = new LayerUpdater();
uArr[1] = new LayerUpdater();
uArr[2] = new LayerUpdater();
INDArray updaterState = Nd4j.create(1, 6 * 7 + 7, 'f');
uArr[2].setStateViewArray(net.getLayer(2), updaterState, true);
uArr[3] = new LayerUpdater();
// updaterStateSize = uArr[3].stateSizeForLayer(net.getLayer(3));
updaterState = Nd4j.create(1, 7 * 8 + 8, 'f');
uArr[3].setStateViewArray(net.getLayer(3), updaterState, true);
int[] nIns = { 4, 5, 6, 7 };
int[] nOuts = { 5, 6, 7, 8 };
for (int i = 0; i < 5; i++) {
Gradient gradient = new DefaultGradient();
Map<String, INDArray> expectedGradient = new LinkedHashMap<>();
for (int j = 0; j < net.getnLayers(); j++) {
//Generate test gradient:
INDArray wGrad = Nd4j.rand(nIns[j], nOuts[j]);
INDArray bGrad = Nd4j.rand(1, nOuts[j]);
String wKey = j + "_" + DefaultParamInitializer.WEIGHT_KEY;
String bKey = j + "_" + DefaultParamInitializer.BIAS_KEY;
gradient.setGradientFor(wKey, wGrad);
gradient.setGradientFor(bKey, bGrad);
//Also put copy of gradient through separate layer updaters to compare
Gradient layerGradient = new DefaultGradient();
layerGradient.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, wGrad.dup());
layerGradient.setGradientFor(DefaultParamInitializer.BIAS_KEY, bGrad.dup());
uArr[j].update(net.getLayer(j), layerGradient, i, 1);
for (String s : layerGradient.gradientForVariable().keySet()) {
expectedGradient.put(j + "_" + s, layerGradient.getGradientFor(s));
}
}
updater.update(net, gradient, i, 1);
assertEquals(gradient.gradientForVariable(), expectedGradient);
}
}
Aggregations