use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class ComputationGraph method calcBackpropGradients.
/**
* Do backprop (gradient calculation)
*
* @param truncatedBPTT false: normal backprop. true: calculate gradients using truncated BPTT for RNN layers
* @param externalEpsilons null usually (for typical supervised learning). If not null (and length > 0) then assume that
* the user has provided some errors externally, as they would do for example in reinforcement
* learning situations.
*/
protected void calcBackpropGradients(boolean truncatedBPTT, INDArray... externalEpsilons) {
if (flattenedGradients == null)
initGradientsView();
LinkedList<Triple<String, INDArray, Character>> gradients = new LinkedList<>();
//Do backprop according to the reverse of the topological ordering of the network
//If true: already set epsilon for this vertex; later epsilons should be *added* to the existing one, not set
boolean[] setVertexEpsilon = new boolean[topologicalOrder.length];
for (int i = topologicalOrder.length - 1; i >= 0; i--) {
GraphVertex current = vertices[topologicalOrder[i]];
if (current.isInputVertex())
//No op
continue;
//FIXME: make the frozen vertex feature extraction more flexible
if (current.hasLayer() && current.getLayer() instanceof FrozenLayer)
break;
if (current.isOutputVertex()) {
//Two reasons for a vertex to be an output vertex:
//(a) it's an output layer (i.e., instanceof IOutputLayer), or
//(b) it's a normal layer, but it has been marked as an output layer for use in external errors - for reinforcement learning, for example
int thisOutputNumber = configuration.getNetworkOutputs().indexOf(current.getVertexName());
if (current.getLayer() instanceof IOutputLayer) {
IOutputLayer outputLayer = (IOutputLayer) current.getLayer();
INDArray currLabels = labels[thisOutputNumber];
outputLayer.setLabels(currLabels);
} else {
current.setEpsilon(externalEpsilons[thisOutputNumber]);
setVertexEpsilon[topologicalOrder[i]] = true;
}
}
Pair<Gradient, INDArray[]> pair = current.doBackward(truncatedBPTT);
INDArray[] epsilons = pair.getSecond();
//Inputs to the current GraphVertex:
VertexIndices[] inputVertices = current.getInputVertices();
//Set epsilons for the vertices that provide inputs to this vertex:
if (inputVertices != null) {
int j = 0;
for (VertexIndices v : inputVertices) {
GraphVertex gv = vertices[v.getVertexIndex()];
if (setVertexEpsilon[gv.getVertexIndex()]) {
//This vertex: must output to multiple vertices... we want to add the epsilons here
INDArray currentEps = gv.getEpsilon();
//TODO: in some circumstances, it may be safe to do in-place add (but not always)
gv.setEpsilon(currentEps.add(epsilons[j++]));
} else {
gv.setEpsilon(epsilons[j++]);
}
setVertexEpsilon[gv.getVertexIndex()] = true;
}
}
if (pair.getFirst() != null) {
Gradient g = pair.getFirst();
Map<String, INDArray> map = g.gradientForVariable();
LinkedList<Triple<String, INDArray, Character>> tempList = new LinkedList<>();
for (Map.Entry<String, INDArray> entry : map.entrySet()) {
String origName = entry.getKey();
String newName = current.getVertexName() + "_" + origName;
tempList.addFirst(new Triple<>(newName, entry.getValue(), g.flatteningOrderForVariable(origName)));
}
for (Triple<String, INDArray, Character> t : tempList) gradients.addFirst(t);
}
}
//Now, add the gradients in the order we need them in for flattening (same as params order)
Gradient gradient = new DefaultGradient(flattenedGradients);
for (Triple<String, INDArray, Character> t : gradients) {
gradient.setGradientFor(t.getFirst(), t.getSecond(), t.getThird());
}
this.gradient = gradient;
}
use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class GravesBidirectionalLSTMTest method testSimpleForwardsAndBackwardsActivation.
@Test
public void testSimpleForwardsAndBackwardsActivation() {
final int nIn = 2;
final int layerSize = 3;
final int miniBatchSize = 1;
final int timeSeriesLength = 5;
Nd4j.getRandom().setSeed(12345);
final NeuralNetConfiguration confBidirectional = new NeuralNetConfiguration.Builder().layer(new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-0.1, 0.1)).activation(Activation.TANH).updater(Updater.NONE).build()).build();
final NeuralNetConfiguration confForwards = new NeuralNetConfiguration.Builder().layer(new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.ZERO).activation(Activation.TANH).build()).build();
int numParams = confForwards.getLayer().initializer().numParams(confForwards);
INDArray params = Nd4j.create(1, numParams);
int numParamsBD = confBidirectional.getLayer().initializer().numParams(confBidirectional);
INDArray paramsBD = Nd4j.create(1, numParamsBD);
final GravesBidirectionalLSTM bidirectionalLSTM = (GravesBidirectionalLSTM) confBidirectional.getLayer().instantiate(confBidirectional, null, 0, paramsBD, true);
final GravesLSTM forwardsLSTM = (GravesLSTM) confForwards.getLayer().instantiate(confForwards, null, 0, params, true);
bidirectionalLSTM.setBackpropGradientsViewArray(Nd4j.create(1, confBidirectional.getLayer().initializer().numParams(confBidirectional)));
forwardsLSTM.setBackpropGradientsViewArray(Nd4j.create(1, confForwards.getLayer().initializer().numParams(confForwards)));
final INDArray sig = Nd4j.rand(new int[] { miniBatchSize, nIn, timeSeriesLength });
final INDArray sigb = sig.dup();
reverseColumnsInPlace(sigb.slice(0));
final INDArray recurrentWeightsF = bidirectionalLSTM.getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS);
final INDArray inputWeightsF = bidirectionalLSTM.getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS);
final INDArray biasWeightsF = bidirectionalLSTM.getParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS);
final INDArray recurrentWeightsF2 = forwardsLSTM.getParam(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY);
final INDArray inputWeightsF2 = forwardsLSTM.getParam(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY);
final INDArray biasWeightsF2 = forwardsLSTM.getParam(GravesLSTMParamInitializer.BIAS_KEY);
//assert that the forwards part of the bidirectional layer is equal to that of the regular LSTM
assertArrayEquals(recurrentWeightsF2.shape(), recurrentWeightsF.shape());
assertArrayEquals(inputWeightsF2.shape(), inputWeightsF.shape());
assertArrayEquals(biasWeightsF2.shape(), biasWeightsF.shape());
forwardsLSTM.setParam(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY, recurrentWeightsF);
forwardsLSTM.setParam(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY, inputWeightsF);
forwardsLSTM.setParam(GravesLSTMParamInitializer.BIAS_KEY, biasWeightsF);
//copy forwards weights to make the forwards activations do the same thing
final INDArray recurrentWeightsB = bidirectionalLSTM.getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS);
final INDArray inputWeightsB = bidirectionalLSTM.getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS);
final INDArray biasWeightsB = bidirectionalLSTM.getParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_BACKWARDS);
//assert that the forwards and backwards are the same shapes
assertArrayEquals(recurrentWeightsF.shape(), recurrentWeightsB.shape());
assertArrayEquals(inputWeightsF.shape(), inputWeightsB.shape());
assertArrayEquals(biasWeightsF.shape(), biasWeightsB.shape());
//zero out backwards layer
bidirectionalLSTM.setParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS, Nd4j.zeros(recurrentWeightsB.shape()));
bidirectionalLSTM.setParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS, Nd4j.zeros(inputWeightsB.shape()));
bidirectionalLSTM.setParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_BACKWARDS, Nd4j.zeros(biasWeightsB.shape()));
forwardsLSTM.setInput(sig);
//compare activations
final INDArray activation1 = forwardsLSTM.activate(sig).slice(0);
final INDArray activation2 = bidirectionalLSTM.activate(sig).slice(0);
assertArrayEquals(activation1.data().asFloat(), activation2.data().asFloat(), 1e-5f);
final INDArray randSig = Nd4j.rand(new int[] { 1, layerSize, timeSeriesLength });
final INDArray randSigBackwards = randSig.dup();
reverseColumnsInPlace(randSigBackwards.slice(0));
final Pair<Gradient, INDArray> backprop1 = forwardsLSTM.backpropGradient(randSig);
final Pair<Gradient, INDArray> backprop2 = bidirectionalLSTM.backpropGradient(randSig);
//compare gradients
assertArrayEquals(backprop1.getFirst().getGradientFor(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY).dup().data().asFloat(), backprop2.getFirst().getGradientFor(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS).dup().data().asFloat(), 1e-5f);
assertArrayEquals(backprop1.getFirst().getGradientFor(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY).dup().data().asFloat(), backprop2.getFirst().getGradientFor(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS).dup().data().asFloat(), 1e-5f);
assertArrayEquals(backprop1.getFirst().getGradientFor(GravesLSTMParamInitializer.BIAS_KEY).dup().data().asFloat(), backprop2.getFirst().getGradientFor(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS).dup().data().asFloat(), 1e-5f);
//copy forwards to backwards
bidirectionalLSTM.setParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS, bidirectionalLSTM.getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS));
bidirectionalLSTM.setParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS, bidirectionalLSTM.getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS));
bidirectionalLSTM.setParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_BACKWARDS, bidirectionalLSTM.getParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS));
//zero out forwards layer
bidirectionalLSTM.setParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS, Nd4j.zeros(recurrentWeightsB.shape()));
bidirectionalLSTM.setParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS, Nd4j.zeros(inputWeightsB.shape()));
bidirectionalLSTM.setParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS, Nd4j.zeros(biasWeightsB.shape()));
//run on reversed signal
final INDArray activation3 = bidirectionalLSTM.activate(sigb).slice(0);
final INDArray activation3Reverse = activation3.dup();
reverseColumnsInPlace(activation3Reverse);
assertEquals(activation3Reverse, activation1);
assertArrayEquals(activation3Reverse.shape(), activation1.shape());
//test backprop now
final INDArray refBackGradientReccurrent = backprop1.getFirst().getGradientFor(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY);
final INDArray refBackGradientInput = backprop1.getFirst().getGradientFor(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY);
final INDArray refBackGradientBias = backprop1.getFirst().getGradientFor(GravesLSTMParamInitializer.BIAS_KEY);
//reverse weights only with backwards signal should yield same result as forwards weights with forwards signal
final Pair<Gradient, INDArray> backprop3 = bidirectionalLSTM.backpropGradient(randSigBackwards);
final INDArray backGradientRecurrent = backprop3.getFirst().getGradientFor(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS);
final INDArray backGradientInput = backprop3.getFirst().getGradientFor(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS);
final INDArray backGradientBias = backprop3.getFirst().getGradientFor(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_BACKWARDS);
assertArrayEquals(refBackGradientBias.dup().data().asDouble(), backGradientBias.dup().data().asDouble(), 1e-6);
assertArrayEquals(refBackGradientInput.dup().data().asDouble(), backGradientInput.dup().data().asDouble(), 1e-6);
assertArrayEquals(refBackGradientReccurrent.dup().data().asDouble(), backGradientRecurrent.dup().data().asDouble(), 1e-6);
final INDArray refEpsilon = backprop1.getSecond().dup();
final INDArray backEpsilon = backprop3.getSecond().dup();
reverseColumnsInPlace(refEpsilon.slice(0));
assertArrayEquals(backEpsilon.dup().data().asDouble(), refEpsilon.dup().data().asDouble(), 1e-6);
}
use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class GravesBidirectionalLSTMTest method testGravesBackwardBasicHelper.
private static void testGravesBackwardBasicHelper(int nIn, int nOut, int lstmNHiddenUnits, int miniBatchSize, int timeSeriesLength) {
INDArray inputData = Nd4j.ones(miniBatchSize, nIn, timeSeriesLength);
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer(new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(lstmNHiddenUnits).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(0, 1)).activation(Activation.TANH).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
GravesBidirectionalLSTM lstm = (GravesBidirectionalLSTM) conf.getLayer().instantiate(conf, null, 0, params, true);
lstm.setBackpropGradientsViewArray(Nd4j.create(1, conf.getLayer().initializer().numParams(conf)));
//Set input, do a forward pass:
lstm.activate(inputData);
assertNotNull(lstm.input());
INDArray epsilon = Nd4j.ones(miniBatchSize, lstmNHiddenUnits, timeSeriesLength);
Pair<Gradient, INDArray> out = lstm.backpropGradient(epsilon);
Gradient outGradient = out.getFirst();
INDArray nextEpsilon = out.getSecond();
INDArray biasGradientF = outGradient.getGradientFor(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS);
INDArray inWeightGradientF = outGradient.getGradientFor(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS);
INDArray recurrentWeightGradientF = outGradient.getGradientFor(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS);
assertNotNull(biasGradientF);
assertNotNull(inWeightGradientF);
assertNotNull(recurrentWeightGradientF);
INDArray biasGradientB = outGradient.getGradientFor(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_BACKWARDS);
INDArray inWeightGradientB = outGradient.getGradientFor(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS);
INDArray recurrentWeightGradientB = outGradient.getGradientFor(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS);
assertNotNull(biasGradientB);
assertNotNull(inWeightGradientB);
assertNotNull(recurrentWeightGradientB);
assertArrayEquals(biasGradientF.shape(), new int[] { 1, 4 * lstmNHiddenUnits });
assertArrayEquals(inWeightGradientF.shape(), new int[] { nIn, 4 * lstmNHiddenUnits });
assertArrayEquals(recurrentWeightGradientF.shape(), new int[] { lstmNHiddenUnits, 4 * lstmNHiddenUnits + 3 });
assertArrayEquals(biasGradientB.shape(), new int[] { 1, 4 * lstmNHiddenUnits });
assertArrayEquals(inWeightGradientB.shape(), new int[] { nIn, 4 * lstmNHiddenUnits });
assertArrayEquals(recurrentWeightGradientB.shape(), new int[] { lstmNHiddenUnits, 4 * lstmNHiddenUnits + 3 });
assertNotNull(nextEpsilon);
assertArrayEquals(nextEpsilon.shape(), new int[] { miniBatchSize, nIn, timeSeriesLength });
//Check update:
for (String s : outGradient.gradientForVariable().keySet()) {
lstm.update(outGradient.getGradientFor(s), s);
}
}
use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class RBMTests method testGradient.
@Test
public void testGradient() {
float[][] data = new float[][] { { 1, 1, 1, 0, 0, 0 }, { 1, 0, 1, 0, 0, 0 }, { 1, 1, 1, 0, 0, 0 }, { 0, 0, 1, 1, 1, 0 }, { 0, 0, 1, 1, 0, 0 }, { 0, 0, 1, 1, 1, 0 }, { 0, 0, 1, 1, 1, 0 } };
INDArray input = Nd4j.create(data);
INDArray params = Nd4j.create(1, 6 * 4 + 6 + 4);
RBM rbm = getRBMLayer(6, 4, HiddenUnit.BINARY, VisibleUnit.BINARY, params, true, false, 1, LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD);
rbm.fit(input);
double value = rbm.score();
Gradient grad2 = rbm.gradient();
assertEquals(24, grad2.getGradientFor("W").length());
}
use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class BatchNormalizationTest method test2dVs4d.
@Test
public void test2dVs4d() {
//Idea: 2d and 4d should be the same...
Nd4j.getRandom().setSeed(12345);
int m = 2;
int h = 3;
int w = 3;
int nOut = 2;
INDArray in = Nd4j.rand('c', m * h * w, nOut);
INDArray in4 = in.dup();
in4 = Shape.newShapeNoCopy(in4, new int[] { m, h, w, nOut }, false);
assertNotNull(in4);
in4 = in4.permute(0, 3, 1, 2).dup();
INDArray arr = Nd4j.rand(1, m * h * w * nOut).reshape('f', h, w, m, nOut).permute(2, 3, 1, 0);
in4 = arr.assign(in4);
Layer l1 = getLayer(nOut);
Layer l2 = getLayer(nOut);
INDArray out2d = l1.activate(in.dup(), true);
INDArray out4d = l2.activate(in4.dup(), true);
INDArray out4dAs2 = out4d.permute(0, 2, 3, 1).dup('c');
out4dAs2 = Shape.newShapeNoCopy(out4dAs2, new int[] { m * h * w, nOut }, false);
assertEquals(out2d, out4dAs2);
//Test backprop:
INDArray epsilons2d = Nd4j.rand('c', m * h * w, nOut);
INDArray epsilons4d = epsilons2d.dup();
epsilons4d = Shape.newShapeNoCopy(epsilons4d, new int[] { m, h, w, nOut }, false);
assertNotNull(epsilons4d);
epsilons4d = epsilons4d.permute(0, 3, 1, 2).dup();
Pair<Gradient, INDArray> b2d = l1.backpropGradient(epsilons2d);
Pair<Gradient, INDArray> b4d = l2.backpropGradient(epsilons4d);
INDArray e4dAs2d = b4d.getSecond().permute(0, 2, 3, 1).dup('c');
e4dAs2d = Shape.newShapeNoCopy(e4dAs2d, new int[] { m * h * w, nOut }, false);
assertEquals(b2d.getSecond(), e4dAs2d);
}
Aggregations