use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.
the class MultiLayerUpdater method update.
@Override
public void update(Layer layer, Gradient gradient, int iteration, int batchSize) {
MultiLayerNetwork mln = (MultiLayerNetwork) layer;
Gradient[] layerGradients = new Gradient[layerUpdaters.length];
for (int i = 0; i < layerGradients.length; i++) layerGradients[i] = new DefaultGradient();
for (Map.Entry<String, INDArray> gradientPair : gradient.gradientForVariable().entrySet()) {
String key = gradientPair.getKey();
int idx = key.indexOf('_');
if (idx == -1)
throw new IllegalStateException("Invalid key: MuliLayerNetwork Gradient key does not have layer separator: \"" + key + "\"");
int layerIdx = Integer.parseInt(key.substring(0, idx));
String newKey = key.substring(idx + 1);
layerGradients[layerIdx].gradientForVariable().put(newKey, gradientPair.getValue());
}
for (int i = 0; i < layerUpdaters.length; i++) {
layerUpdaters[i].update(mln.getLayer(i), layerGradients[i], iteration, batchSize);
}
}
use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.
the class ComputationGraph method calcBackpropGradients.
/**
* Do backprop (gradient calculation)
*
* @param truncatedBPTT false: normal backprop. true: calculate gradients using truncated BPTT for RNN layers
* @param externalEpsilons null usually (for typical supervised learning). If not null (and length > 0) then assume that
* the user has provided some errors externally, as they would do for example in reinforcement
* learning situations.
*/
protected void calcBackpropGradients(boolean truncatedBPTT, INDArray... externalEpsilons) {
if (flattenedGradients == null)
initGradientsView();
LinkedList<Triple<String, INDArray, Character>> gradients = new LinkedList<>();
//Do backprop according to the reverse of the topological ordering of the network
//If true: already set epsilon for this vertex; later epsilons should be *added* to the existing one, not set
boolean[] setVertexEpsilon = new boolean[topologicalOrder.length];
for (int i = topologicalOrder.length - 1; i >= 0; i--) {
GraphVertex current = vertices[topologicalOrder[i]];
if (current.isInputVertex())
//No op
continue;
//FIXME: make the frozen vertex feature extraction more flexible
if (current.hasLayer() && current.getLayer() instanceof FrozenLayer)
break;
if (current.isOutputVertex()) {
//Two reasons for a vertex to be an output vertex:
//(a) it's an output layer (i.e., instanceof IOutputLayer), or
//(b) it's a normal layer, but it has been marked as an output layer for use in external errors - for reinforcement learning, for example
int thisOutputNumber = configuration.getNetworkOutputs().indexOf(current.getVertexName());
if (current.getLayer() instanceof IOutputLayer) {
IOutputLayer outputLayer = (IOutputLayer) current.getLayer();
INDArray currLabels = labels[thisOutputNumber];
outputLayer.setLabels(currLabels);
} else {
current.setEpsilon(externalEpsilons[thisOutputNumber]);
setVertexEpsilon[topologicalOrder[i]] = true;
}
}
Pair<Gradient, INDArray[]> pair = current.doBackward(truncatedBPTT);
INDArray[] epsilons = pair.getSecond();
//Inputs to the current GraphVertex:
VertexIndices[] inputVertices = current.getInputVertices();
//Set epsilons for the vertices that provide inputs to this vertex:
if (inputVertices != null) {
int j = 0;
for (VertexIndices v : inputVertices) {
GraphVertex gv = vertices[v.getVertexIndex()];
if (setVertexEpsilon[gv.getVertexIndex()]) {
//This vertex: must output to multiple vertices... we want to add the epsilons here
INDArray currentEps = gv.getEpsilon();
//TODO: in some circumstances, it may be safe to do in-place add (but not always)
gv.setEpsilon(currentEps.add(epsilons[j++]));
} else {
gv.setEpsilon(epsilons[j++]);
}
setVertexEpsilon[gv.getVertexIndex()] = true;
}
}
if (pair.getFirst() != null) {
Gradient g = pair.getFirst();
Map<String, INDArray> map = g.gradientForVariable();
LinkedList<Triple<String, INDArray, Character>> tempList = new LinkedList<>();
for (Map.Entry<String, INDArray> entry : map.entrySet()) {
String origName = entry.getKey();
String newName = current.getVertexName() + "_" + origName;
tempList.addFirst(new Triple<>(newName, entry.getValue(), g.flatteningOrderForVariable(origName)));
}
for (Triple<String, INDArray, Character> t : tempList) gradients.addFirst(t);
}
}
//Now, add the gradients in the order we need them in for flattening (same as params order)
Gradient gradient = new DefaultGradient(flattenedGradients);
for (Triple<String, INDArray, Character> t : gradients) {
gradient.setGradientFor(t.getFirst(), t.getSecond(), t.getThird());
}
this.gradient = gradient;
}
use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.
the class TestDecayPolicies method testLearningRateSigmoidDecaySingleLayer.
@Test
public void testLearningRateSigmoidDecaySingleLayer() {
int iterations = 2;
double lr = 1e-2;
double decayRate = 2;
double steps = 3;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).learningRateDecayPolicy(LearningRatePolicy.Sigmoid).lrPolicyDecayRate(decayRate).lrPolicySteps(steps).iterations(iterations).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
Gradient gradientActual = new DefaultGradient();
gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
for (int i = 0; i < iterations; i++) {
updater.update(layer, gradientActual, i, 1);
double expectedLr = calcSigmoidDecay(layer.conf().getLearningRateByParam("W"), decayRate, i, steps);
assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
}
}
use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.
the class TestDecayPolicies method testMomentumScheduleMLN.
@Test
public void testMomentumScheduleMLN() {
double lr = 1e-2;
double mu = 0.6;
Map<Integer, Double> momentumAfter = new HashMap<>();
momentumAfter.put(1, 0.2);
int iterations = 2;
int[] nIns = { 4, 2 };
int[] nOuts = { 2, 3 };
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).momentum(mu).momentumAfter(momentumAfter).iterations(iterations).list().layer(0, new DenseLayer.Builder().nIn(nIns[0]).nOut(nOuts[0]).updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build()).layer(1, new OutputLayer.Builder().nIn(nIns[1]).nOut(nOuts[1]).updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build()).backprop(true).pretrain(false).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
Updater updater = UpdaterCreator.getUpdater(net);
int stateSize = updater.stateSizeForLayer(net);
updater.setStateViewArray(net, Nd4j.create(1, stateSize), true);
String wKey, bKey;
Gradient gradientExpected = new DefaultGradient();
for (int k = 0; k < net.getnLayers(); k++) {
wKey = String.valueOf(k) + "_" + DefaultParamInitializer.WEIGHT_KEY;
gradientExpected.setGradientFor(wKey, Nd4j.ones(nIns[k], nOuts[k]));
bKey = String.valueOf(k) + "_" + DefaultParamInitializer.BIAS_KEY;
gradientExpected.setGradientFor(bKey, Nd4j.ones(1, nOuts[k]));
}
Gradient gradientMLN = new DefaultGradient();
for (int j = 0; j < 2; j++) {
wKey = String.valueOf(j) + "_" + DefaultParamInitializer.WEIGHT_KEY;
gradientMLN.setGradientFor(wKey, Nd4j.ones(nIns[j], nOuts[j]));
bKey = String.valueOf(j) + "_" + DefaultParamInitializer.BIAS_KEY;
gradientMLN.setGradientFor(bKey, Nd4j.ones(1, nOuts[j]));
}
for (int i = 0; i < 2; i++) {
updater.update(net, gradientMLN, i, 1);
mu = testNesterovsComputation(gradientMLN, gradientExpected, lr, mu, momentumAfter, i);
assertEquals(mu, net.getLayer(1).conf().getLayer().getMomentum(), 1e-4);
}
}
use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.
the class TestDecayPolicies method testLearningRateExponentialDecaySingleLayer.
@Test
public void testLearningRateExponentialDecaySingleLayer() {
int iterations = 2;
double lr = 1e-2;
double decayRate = 2;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).learningRateDecayPolicy(LearningRatePolicy.Exponential).lrPolicyDecayRate(decayRate).iterations(iterations).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
Gradient gradientActual = new DefaultGradient();
gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
for (int i = 0; i < iterations; i++) {
updater.update(layer, gradientActual, i, 1);
double expectedLr = calcExponentialDecay(lr, decayRate, i);
assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
}
}
Aggregations