use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.
the class TestUpdaters method testPretrain.
@Test
public void testPretrain() {
double lr = 0.05;
gradient.setGradientFor(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbiasGradient.dup());
Gradient gradientDup = new DefaultGradient();
gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
gradientDup.setGradientFor(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbiasGradient.dup());
Gradient gradientDup2 = new DefaultGradient();
gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
gradientDup.setGradientFor(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbiasGradient.dup());
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).seed(42).layer(new org.deeplearning4j.nn.conf.layers.RBM.Builder().lossFunction(LossFunctions.LossFunction.COSINE_PROXIMITY).activation(Activation.IDENTITY).updater(org.deeplearning4j.nn.conf.Updater.SGD).nIn(nIn).nOut(nOut).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
boolean preTrain = true;
conf.setPretrain(preTrain);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
updater.update(layer, gradient, -1, 1);
for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
val = entry.getValue();
gradExpected = val.mul(lr);
assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
}
assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4);
preTrain = false;
conf.setPretrain(preTrain);
gradient = gradientDup;
params = Nd4j.create(1, numParams);
layer = conf.getLayer().instantiate(conf, null, 0, params, true);
updater.update(layer, gradient, -1, 1);
for (Map.Entry<String, INDArray> entry : gradientDup2.gradientForVariable().entrySet()) {
val = entry.getValue();
if (entry.getKey() != "vb")
gradExpected = val.mul(lr);
else
gradExpected = val;
assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
}
assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4);
}
use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.
the class TestUpdaters method testAdaDeltaUpdate.
@Test
public void testAdaDeltaUpdate() {
INDArray dxSquared;
Map<String, INDArray> msg = new HashMap<>();
Map<String, INDArray> msdx = new HashMap<>();
double rho = 0.85;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().rho(rho).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.ADADELTA).epsilon(Nd4j.EPS_THRESHOLD).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
int updaterStateSize = updater.stateSizeForLayer(layer);
INDArray updaterState = Nd4j.create(1, updaterStateSize);
updater.setStateViewArray(layer, updaterState, true);
Gradient gradientDup = new DefaultGradient();
gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
for (int i = 0; i < 2; i++) {
updater.update(layer, gradient, i, 1);
for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
key = entry.getKey();
val = entry.getValue();
INDArray msgTmp = msg.get(key);
INDArray msdxTmp = msdx.get(key);
if (msgTmp == null) {
msgTmp = Nd4j.zeros(val.shape());
msdxTmp = Nd4j.zeros(val.shape());
}
msgTmp.muli(rho);
msgTmp.addi(val.mul(val).muli(1 - rho));
gradExpected = Transforms.sqrt(msdxTmp.add(Nd4j.EPS_THRESHOLD)).divi(Transforms.sqrt(msgTmp.add(Nd4j.EPS_THRESHOLD))).muli(val);
gradientDup.setGradientFor(key, gradExpected);
assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
msdxTmp.muli(rho);
dxSquared = gradExpected.mul(gradExpected);
msdxTmp.addi(dxSquared.muli(1 - rho));
msg.put(key, msgTmp);
msdx.put(key, msdxTmp);
}
assertEquals(rho, layer.conf().getLayer().getRho(), 1e-4);
}
}
use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.
the class TestUpdaters method testEpsilon.
@Test
public void testEpsilon() {
//Test epsilon setting - adagrad
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()).layer(1, new DenseLayer.Builder().nIn(2).nOut(2).epsilon(0.123).build()).layer(2, new OutputLayer.Builder().nIn(2).nOut(2).epsilon(0.456).build()).build();
assertEquals(1e-6, conf.getConf(0).getLayer().getEpsilon(), 0.0);
assertEquals(0.123, conf.getConf(1).getLayer().getEpsilon(), 0.0);
assertEquals(0.456, conf.getConf(2).getLayer().getEpsilon(), 0.0);
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
MultiLayerUpdater updater = (MultiLayerUpdater) net.getUpdater();
Updater[] updaters = updater.getLayerUpdaters();
LayerUpdater u0 = (LayerUpdater) updaters[0];
AdaGrad adaGrad = (AdaGrad) u0.updaterForVariable.get("W");
assertEquals(1e-6, adaGrad.getEpsilon(), 0.0);
LayerUpdater u1 = (LayerUpdater) updaters[1];
AdaGrad adaGrad1 = (AdaGrad) u1.updaterForVariable.get("W");
assertEquals(0.123, adaGrad1.getEpsilon(), 0.0);
LayerUpdater u2 = (LayerUpdater) updaters[2];
AdaGrad adaGrad2 = (AdaGrad) u2.updaterForVariable.get("W");
assertEquals(0.456, adaGrad2.getEpsilon(), 0.0);
//Test epsilon setting - adadelta
conf = new NeuralNetConfiguration.Builder().updater(org.deeplearning4j.nn.conf.Updater.ADADELTA).list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()).layer(1, new DenseLayer.Builder().nIn(2).nOut(2).epsilon(0.123).build()).layer(2, new OutputLayer.Builder().nIn(2).nOut(2).epsilon(0.456).build()).build();
assertEquals(1e-6, conf.getConf(0).getLayer().getEpsilon(), 0.0);
assertEquals(0.123, conf.getConf(1).getLayer().getEpsilon(), 0.0);
assertEquals(0.456, conf.getConf(2).getLayer().getEpsilon(), 0.0);
net = new MultiLayerNetwork(conf);
net.init();
updater = (MultiLayerUpdater) net.getUpdater();
updaters = updater.getLayerUpdaters();
LayerUpdater u0_2 = (LayerUpdater) updaters[0];
AdaDelta adaDelta = (AdaDelta) u0_2.updaterForVariable.get("W");
assertEquals(1e-6, adaDelta.getEpsilon(), 0.0);
LayerUpdater u1_2 = (LayerUpdater) updaters[1];
AdaDelta adaDelta1 = (AdaDelta) u1_2.updaterForVariable.get("W");
assertEquals(0.123, adaDelta1.getEpsilon(), 0.0);
LayerUpdater u2_2 = (LayerUpdater) updaters[2];
AdaDelta adaDelta2 = (AdaDelta) u2_2.updaterForVariable.get("W");
assertEquals(0.456, adaDelta2.getEpsilon(), 0.0);
}
use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.
the class GradientCheckUtil method checkGradientsPretrainLayer.
/**
* Check backprop gradients for a pretrain layer
*
* NOTE: gradient checking pretrain layers can be difficult...
*/
public static boolean checkGradientsPretrainLayer(Layer layer, double epsilon, double maxRelError, double minAbsoluteError, boolean print, boolean exitOnFirstError, INDArray input, int rngSeed) {
//Basic sanity checks on input:
if (epsilon <= 0.0 || epsilon > 0.1)
throw new IllegalArgumentException("Invalid epsilon: expect epsilon in range (0,0.1], usually 1e-4 or so");
if (maxRelError <= 0.0 || maxRelError > 0.25)
throw new IllegalArgumentException("Invalid maxRelativeError: " + maxRelError);
//Check network configuration:
int layerCount = 0;
layer.setInput(input);
Nd4j.getRandom().setSeed(rngSeed);
layer.computeGradientAndScore();
Pair<Gradient, Double> gradAndScore = layer.gradientAndScore();
Updater updater = UpdaterCreator.getUpdater(layer);
updater.update(layer, gradAndScore.getFirst(), 0, layer.batchSize());
//need dup: gradients are a *view* of the full gradient array (which will change every time backprop is done)
INDArray gradientToCheck = gradAndScore.getFirst().gradient().dup();
//need dup: params are a *view* of full parameters
INDArray originalParams = layer.params().dup();
int nParams = originalParams.length();
Map<String, INDArray> paramTable = layer.paramTable();
List<String> paramNames = new ArrayList<>(paramTable.keySet());
int[] paramEnds = new int[paramNames.size()];
paramEnds[0] = paramTable.get(paramNames.get(0)).length();
for (int i = 1; i < paramEnds.length; i++) {
paramEnds[i] = paramEnds[i - 1] + paramTable.get(paramNames.get(i)).length();
}
int totalNFailures = 0;
double maxError = 0.0;
int currParamNameIdx = 0;
//Assumption here: params is a view that we can modify in-place
INDArray params = layer.params();
for (int i = 0; i < nParams; i++) {
//Get param name
if (i >= paramEnds[currParamNameIdx]) {
currParamNameIdx++;
}
String paramName = paramNames.get(currParamNameIdx);
//(w+epsilon): Do forward pass and score
double origValue = params.getDouble(i);
params.putScalar(i, origValue + epsilon);
//TODO add a 'score' method that doesn't calculate gradients...
Nd4j.getRandom().setSeed(rngSeed);
layer.computeGradientAndScore();
double scorePlus = layer.score();
//(w-epsilon): Do forward pass and score
params.putScalar(i, origValue - epsilon);
Nd4j.getRandom().setSeed(rngSeed);
layer.computeGradientAndScore();
double scoreMinus = layer.score();
//Reset original param value
params.putScalar(i, origValue);
//Calculate numerical parameter gradient:
double scoreDelta = scorePlus - scoreMinus;
double numericalGradient = scoreDelta / (2 * epsilon);
if (Double.isNaN(numericalGradient))
throw new IllegalStateException("Numerical gradient was NaN for parameter " + i + " of " + nParams);
double backpropGradient = gradientToCheck.getDouble(i);
//http://cs231n.github.io/neural-networks-3/#gradcheck
//use mean centered
double relError = Math.abs(backpropGradient - numericalGradient) / (Math.abs(numericalGradient) + Math.abs(backpropGradient));
if (backpropGradient == 0.0 && numericalGradient == 0.0)
//Edge case: i.e., RNNs with time series length of 1.0
relError = 0.0;
if (relError > maxError)
maxError = relError;
if (relError > maxRelError || Double.isNaN(relError)) {
double absError = Math.abs(backpropGradient - numericalGradient);
if (absError < minAbsoluteError) {
log.info("Param " + i + " (" + paramName + ") passed: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError + "; absolute error = " + absError + " < minAbsoluteError = " + minAbsoluteError);
} else {
if (print)
log.info("Param " + i + " (" + paramName + ") FAILED: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError + ", scorePlus=" + scorePlus + ", scoreMinus= " + scoreMinus);
if (exitOnFirstError)
return false;
totalNFailures++;
}
} else if (print) {
log.info("Param " + i + " (" + paramName + ") passed: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError);
}
}
if (print) {
int nPass = nParams - totalNFailures;
log.info("GradientCheckUtil.checkGradients(): " + nParams + " params checked, " + nPass + " passed, " + totalNFailures + " failed. Largest relative error = " + maxError);
}
return totalNFailures == 0;
}
use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.
the class LossLayer method fit.
/**
* Fit the model
*
* @param input the examples to classify (one example in each row)
* @param labels the example labels(a binary outcome matrix)
*/
@Override
public void fit(INDArray input, INDArray labels) {
setInput(input);
setLabels(labels);
applyDropOutIfNecessary(true);
if (solver == null) {
solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this).build();
//Set the updater state view array. For MLN and CG, this is done by MultiLayerUpdater and ComputationGraphUpdater respectively
Updater updater = solver.getOptimizer().getUpdater();
int updaterStateSize = updater.stateSizeForLayer(this);
if (updaterStateSize > 0)
updater.setStateViewArray(this, Nd4j.createUninitialized(new int[] { 1, updaterStateSize }, Nd4j.order()), true);
}
solver.optimize();
}
Aggregations