use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class MultiLayerNetwork method calcBackpropGradients.
/** Calculate gradients and errors. Used in two places:
* (a) backprop (for standard multi layer network learning)
* (b) backpropGradient (layer method, for when MultiLayerNetwork is used as a layer)
* @param epsilon Errors (technically errors .* activations). Not used if withOutputLayer = true
* @param withOutputLayer if true: assume last layer is output layer, and calculate errors based on labels. In this
* case, the epsilon input is not used (may/should be null).
* If false: calculate backprop gradients
* @return Gradients and the error (epsilon) at the input
*/
protected Pair<Gradient, INDArray> calcBackpropGradients(INDArray epsilon, boolean withOutputLayer) {
if (flattenedGradients == null)
initGradientsView();
String multiGradientKey;
Gradient gradient = new DefaultGradient(flattenedGradients);
Layer currLayer;
//calculate and apply the backward gradient for every layer
/**
* Skip the output layer for the indexing and just loop backwards updating the coefficients for each layer.
* (when withOutputLayer == true)
*
* Activate applies the activation function for each layer and sets that as the input for the following layer.
*
* Typical literature contains most trivial case for the error calculation: wT * weights
* This interpretation transpose a few things to get mini batch because ND4J is rows vs columns organization for params
*/
int numLayers = getnLayers();
//Store gradients is a list; used to ensure iteration order in DefaultGradient linked hash map. i.e., layer 0 first instead of output layer
LinkedList<Triple<String, INDArray, Character>> gradientList = new LinkedList<>();
int layerFrom;
Pair<Gradient, INDArray> currPair;
if (withOutputLayer) {
if (!(getOutputLayer() instanceof IOutputLayer)) {
log.warn("Warning: final layer isn't output layer. You cannot use backprop without an output layer.");
return null;
}
IOutputLayer outputLayer = (IOutputLayer) getOutputLayer();
if (labels == null)
throw new IllegalStateException("No labels found");
outputLayer.setLabels(labels);
currPair = outputLayer.backpropGradient(null);
for (Map.Entry<String, INDArray> entry : currPair.getFirst().gradientForVariable().entrySet()) {
String origName = entry.getKey();
multiGradientKey = String.valueOf(numLayers - 1) + "_" + origName;
gradientList.addLast(new Triple<>(multiGradientKey, entry.getValue(), currPair.getFirst().flatteningOrderForVariable(origName)));
}
if (getLayerWiseConfigurations().getInputPreProcess(numLayers - 1) != null)
currPair = new Pair<>(currPair.getFirst(), this.layerWiseConfigurations.getInputPreProcess(numLayers - 1).backprop(currPair.getSecond(), getInputMiniBatchSize()));
layerFrom = numLayers - 2;
} else {
currPair = new Pair<>(null, epsilon);
layerFrom = numLayers - 1;
}
// Calculate gradients for previous layers & drops output layer in count
for (int j = layerFrom; j >= 0; j--) {
currLayer = getLayer(j);
if (currLayer instanceof FrozenLayer)
break;
currPair = currLayer.backpropGradient(currPair.getSecond());
LinkedList<Triple<String, INDArray, Character>> tempList = new LinkedList<>();
for (Map.Entry<String, INDArray> entry : currPair.getFirst().gradientForVariable().entrySet()) {
String origName = entry.getKey();
multiGradientKey = String.valueOf(j) + "_" + origName;
tempList.addFirst(new Triple<>(multiGradientKey, entry.getValue(), currPair.getFirst().flatteningOrderForVariable(origName)));
}
for (Triple<String, INDArray, Character> triple : tempList) gradientList.addFirst(triple);
//Pass epsilon through input processor before passing to next layer (if applicable)
if (getLayerWiseConfigurations().getInputPreProcess(j) != null)
currPair = new Pair<>(currPair.getFirst(), getLayerWiseConfigurations().getInputPreProcess(j).backprop(currPair.getSecond(), getInputMiniBatchSize()));
}
//Add gradients to Gradients (map), in correct order
for (Triple<String, INDArray, Character> triple : gradientList) {
gradient.setGradientFor(triple.getFirst(), triple.getSecond(), triple.getThird());
}
return new Pair<>(gradient, currPair.getSecond());
}
use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class ComputationGraphUpdater method update.
/**
* Update the gradients for the given ComputationGraph
*/
public void update(ComputationGraph graph, Gradient gradient, int iteration, int batchSize) {
Map<String, Gradient> layerGradients = new HashMap<>();
for (Map.Entry<String, INDArray> gradientPair : gradient.gradientForVariable().entrySet()) {
String key = gradientPair.getKey();
int idx = key.lastIndexOf('_');
if (idx == -1)
throw new IllegalStateException("Invalid key: ComputationGraph Gradient key does not have layer separator: \"" + key + "\"");
String layerName = key.substring(0, idx);
Gradient g = layerGradients.get(layerName);
if (g == null) {
g = new DefaultGradient();
layerGradients.put(layerName, g);
}
String newKey = key.substring(idx + 1);
g.setGradientFor(newKey, gradientPair.getValue());
}
for (Map.Entry<String, Gradient> entry : layerGradients.entrySet()) {
String layerName = entry.getKey();
int updaterIdx = layerUpdatersMap.get(layerName);
layerUpdaters[updaterIdx].update(graph.getLayer(layerName), entry.getValue(), iteration, batchSize);
//Gradients may be replaced by BaseUpdater.update()
for (Map.Entry<String, INDArray> entry2 : layerGradients.get(layerName).gradientForVariable().entrySet()) {
gradient.setGradientFor(entry.getKey() + "_" + entry2.getKey(), entry2.getValue());
}
}
}
use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class RBM method backpropGradient.
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon) {
//If this layer is layer L, then epsilon is (w^(L+1)*(d^(L+1))^T) (or equivalent)
INDArray z = preOutput(input, true);
INDArray activationDerivative = propUpDerivative(z);
INDArray delta = epsilon.muli(activationDerivative);
if (maskArray != null) {
delta.muliColumnVector(maskArray);
}
Gradient ret = new DefaultGradient();
//f order
INDArray weightGrad = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY);
Nd4j.gemm(input, delta, weightGrad, true, false, 1.0, 0.0);
INDArray biasGrad = gradientViews.get(DefaultParamInitializer.BIAS_KEY);
biasGrad.assign(delta.sum(0));
INDArray vBiasGradient = gradientViews.get(PretrainParamInitializer.VISIBLE_BIAS_KEY);
ret.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGrad);
ret.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGrad);
ret.gradientForVariable().put(PretrainParamInitializer.VISIBLE_BIAS_KEY, vBiasGradient);
INDArray epsilonNext = params.get(DefaultParamInitializer.WEIGHT_KEY).mmul(delta.transpose()).transpose();
return new Pair<>(ret, epsilonNext);
}
use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class GlobalPoolingLayer method backpropGradient.
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon) {
if (!collapseDimensions && epsilon.rank() != 2) {
int[] origShape = epsilon.shape();
//Don't collapse dims case: error should be [minibatch, vectorSize, 1] or [minibatch, depth, 1, 1]
//Reshape it to 2d, to get rid of the 1s
epsilon = epsilon.reshape(epsilon.ordering(), origShape[0], origShape[1]);
}
//Empty: no params
Gradient retGradient = new DefaultGradient();
int[] poolDim = null;
if (input.rank() == 3) {
if (poolingDimensions == null) {
//Use default pooling dimensions;
poolDim = DEFAULT_TIMESERIES_POOL_DIMS;
} else {
poolDim = poolingDimensions;
}
} else if (input.rank() == 4) {
//CNN activations
if (poolingDimensions == null) {
//Use default pooling dimensions;
poolDim = DEFAULT_CNN_POOL_DIMS;
} else {
poolDim = poolingDimensions;
}
}
INDArray epsilonNd;
if (maskArray == null) {
//Standard 'full array' global pooling op
epsilonNd = epsilonHelperFullArray(input, epsilon, poolDim);
} else {
if (input.rank() == 3) {
epsilonNd = MaskedReductionUtil.maskedPoolingEpsilonTimeSeries(poolingType, input, maskArray, epsilon, pNorm);
} else if (input.rank() == 4) {
int h = input.size(2);
boolean maskAlongHeight = (h == maskArray.size(1));
epsilonNd = MaskedReductionUtil.maskedPoolingEpsilonCnn(poolingType, input, maskArray, epsilon, maskAlongHeight, pNorm);
} else {
throw new UnsupportedOperationException();
}
}
return new Pair<>(retGradient, epsilonNd);
}
use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class LossLayer method computeGradientAndScore.
@Override
public void computeGradientAndScore() {
if (input == null || labels == null)
return;
INDArray preOut = input;
Pair<Gradient, INDArray> pair = getGradientsAndDelta(preOut);
this.gradient = pair.getFirst();
score = computeScore(fullNetworkL1, fullNetworkL2, true);
}
Aggregations