use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.
the class TestUpdaters method testRMSPropUpdater.
@Test
public void testRMSPropUpdater() {
double lr = 0.01;
double rmsDecay = 0.25;
Map<String, INDArray> lastG = new HashMap<>();
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).rmsDecay(rmsDecay).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.RMSPROP).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
int updaterStateSize = updater.stateSizeForLayer(layer);
INDArray updaterState = Nd4j.create(1, updaterStateSize);
updater.setStateViewArray(layer, updaterState, true);
updater.update(layer, gradient, -1, 1);
Gradient gradientDup = new DefaultGradient();
gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
double epsilon = 1e-8;
for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
key = entry.getKey();
val = entry.getValue();
INDArray lastGTmp = lastG.get(key);
if (lastGTmp == null)
lastGTmp = Nd4j.zeros(val.shape());
lastGTmp.muli(rmsDecay).addi(val.mul(val).muli(1 - rmsDecay));
gradExpected = val.mul(lr).div(Transforms.sqrt(lastGTmp.add(epsilon)));
assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
lastG.put(key, lastGTmp);
}
assertEquals(rmsDecay, layer.conf().getLayer().getRmsDecay(), 1e-4);
}
use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.
the class TestUpdaters method testSGDUpdater.
@Test
public void testSGDUpdater() {
double lr = 0.05;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
updater.update(layer, gradient, -1, 1);
Gradient gradientDup = new DefaultGradient();
gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
val = entry.getValue();
gradExpected = val.mul(lr);
assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
}
assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4);
}
use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.
the class TestUpdaters method testMultiLayerUpdater.
@Test
public void testMultiLayerUpdater() throws Exception {
Nd4j.getRandom().setSeed(12345L);
double lr = 0.03;
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).momentum(0.6).list().layer(0, new DenseLayer.Builder().nIn(4).nOut(5).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).layer(1, new DenseLayer.Builder().nIn(5).nOut(6).updater(org.deeplearning4j.nn.conf.Updater.NONE).build()).layer(2, new DenseLayer.Builder().nIn(6).nOut(7).updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build()).layer(3, new DenseLayer.Builder().nIn(7).nOut(8).updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
Updater updater = UpdaterCreator.getUpdater(net);
assertNotNull(updater);
assertTrue(updater.getClass() == MultiLayerUpdater.class);
Field f = MultiLayerUpdater.class.getDeclaredField("layerUpdaters");
f.setAccessible(true);
Updater[] updaters = (Updater[]) f.get(updater);
assertNotNull(updaters);
assertTrue(updaters.length == net.getnLayers());
assertTrue(updaters[0] instanceof LayerUpdater);
assertTrue(updaters[1] instanceof LayerUpdater);
assertTrue(updaters[2] instanceof LayerUpdater);
assertTrue(updaters[3] instanceof LayerUpdater);
int count = 0;
for (Updater u : updaters) {
LayerUpdater lu = (LayerUpdater) u;
for (GradientUpdater gu : lu.updaterForVariable.values()) {
switch(count) {
case 0:
assertTrue(gu instanceof Sgd);
break;
case 1:
assertTrue(gu instanceof org.nd4j.linalg.learning.NoOpUpdater);
break;
case 2:
assertTrue(gu instanceof AdaGrad);
break;
case 3:
assertTrue(gu instanceof Nesterovs);
break;
default:
throw new RuntimeException();
}
}
count++;
}
LayerUpdater u = (LayerUpdater) updaters[0];
Updater[] uArr = new Updater[4];
uArr[0] = new LayerUpdater();
uArr[1] = new LayerUpdater();
uArr[2] = new LayerUpdater();
INDArray updaterState = Nd4j.create(1, 6 * 7 + 7, 'f');
uArr[2].setStateViewArray(net.getLayer(2), updaterState, true);
uArr[3] = new LayerUpdater();
// updaterStateSize = uArr[3].stateSizeForLayer(net.getLayer(3));
updaterState = Nd4j.create(1, 7 * 8 + 8, 'f');
uArr[3].setStateViewArray(net.getLayer(3), updaterState, true);
int[] nIns = { 4, 5, 6, 7 };
int[] nOuts = { 5, 6, 7, 8 };
for (int i = 0; i < 5; i++) {
Gradient gradient = new DefaultGradient();
Map<String, INDArray> expectedGradient = new LinkedHashMap<>();
for (int j = 0; j < net.getnLayers(); j++) {
//Generate test gradient:
INDArray wGrad = Nd4j.rand(nIns[j], nOuts[j]);
INDArray bGrad = Nd4j.rand(1, nOuts[j]);
String wKey = j + "_" + DefaultParamInitializer.WEIGHT_KEY;
String bKey = j + "_" + DefaultParamInitializer.BIAS_KEY;
gradient.setGradientFor(wKey, wGrad);
gradient.setGradientFor(bKey, bGrad);
//Also put copy of gradient through separate layer updaters to compare
Gradient layerGradient = new DefaultGradient();
layerGradient.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, wGrad.dup());
layerGradient.setGradientFor(DefaultParamInitializer.BIAS_KEY, bGrad.dup());
uArr[j].update(net.getLayer(j), layerGradient, i, 1);
for (String s : layerGradient.gradientForVariable().keySet()) {
expectedGradient.put(j + "_" + s, layerGradient.getGradientFor(s));
}
}
updater.update(net, gradient, i, 1);
assertEquals(gradient.gradientForVariable(), expectedGradient);
}
}
use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.
the class TestUpdaters method testPretrain.
@Test
public void testPretrain() {
double lr = 0.05;
gradient.setGradientFor(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbiasGradient.dup());
Gradient gradientDup = new DefaultGradient();
gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
gradientDup.setGradientFor(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbiasGradient.dup());
Gradient gradientDup2 = new DefaultGradient();
gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
gradientDup.setGradientFor(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbiasGradient.dup());
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).seed(42).layer(new org.deeplearning4j.nn.conf.layers.RBM.Builder().lossFunction(LossFunctions.LossFunction.COSINE_PROXIMITY).activation(Activation.IDENTITY).updater(org.deeplearning4j.nn.conf.Updater.SGD).nIn(nIn).nOut(nOut).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
boolean preTrain = true;
conf.setPretrain(preTrain);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
updater.update(layer, gradient, -1, 1);
for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
val = entry.getValue();
gradExpected = val.mul(lr);
assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
}
assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4);
preTrain = false;
conf.setPretrain(preTrain);
gradient = gradientDup;
params = Nd4j.create(1, numParams);
layer = conf.getLayer().instantiate(conf, null, 0, params, true);
updater.update(layer, gradient, -1, 1);
for (Map.Entry<String, INDArray> entry : gradientDup2.gradientForVariable().entrySet()) {
val = entry.getValue();
if (entry.getKey() != "vb")
gradExpected = val.mul(lr);
else
gradExpected = val;
assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
}
assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4);
}
use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.
the class TestUpdaters method testAdaDeltaUpdate.
@Test
public void testAdaDeltaUpdate() {
INDArray dxSquared;
Map<String, INDArray> msg = new HashMap<>();
Map<String, INDArray> msdx = new HashMap<>();
double rho = 0.85;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().rho(rho).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.ADADELTA).epsilon(Nd4j.EPS_THRESHOLD).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
int updaterStateSize = updater.stateSizeForLayer(layer);
INDArray updaterState = Nd4j.create(1, updaterStateSize);
updater.setStateViewArray(layer, updaterState, true);
Gradient gradientDup = new DefaultGradient();
gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
for (int i = 0; i < 2; i++) {
updater.update(layer, gradient, i, 1);
for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
key = entry.getKey();
val = entry.getValue();
INDArray msgTmp = msg.get(key);
INDArray msdxTmp = msdx.get(key);
if (msgTmp == null) {
msgTmp = Nd4j.zeros(val.shape());
msdxTmp = Nd4j.zeros(val.shape());
}
msgTmp.muli(rho);
msgTmp.addi(val.mul(val).muli(1 - rho));
gradExpected = Transforms.sqrt(msdxTmp.add(Nd4j.EPS_THRESHOLD)).divi(Transforms.sqrt(msgTmp.add(Nd4j.EPS_THRESHOLD))).muli(val);
gradientDup.setGradientFor(key, gradExpected);
assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
msdxTmp.muli(rho);
dxSquared = gradExpected.mul(gradExpected);
msdxTmp.addi(dxSquared.muli(1 - rho));
msg.put(key, msgTmp);
msdx.put(key, msdxTmp);
}
assertEquals(rho, layer.conf().getLayer().getRho(), 1e-4);
}
}
Aggregations